class TestEtcd(unittest.TestCase): def setUp(self): logger = logging.getLogger() stream = logging.StreamHandler(sys.stdout) stream.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') stream.setFormatter(formatter) logger.addHandler(stream) self.etcd = Etcd(logger) def test_a_setkey(self): ret = self.etcd.set_key('message', 'Hello World') self.assertTrue(ret) def test_b_getkey(self): self.etcd.set_key('message', 'Hello World') text = self.etcd.get_key('message') self.assertEqual(text, 'Hello World') def test_c_deletekey(self): #Set the key first before deleting it self.etcd.set_key('message', 'Hello World') text = self.etcd.delete_key('message') regex = re.compile(r'{"action":"delete","node":{"key":"/message",' '"modifiedIndex":\d+,"createdIndex":\d+},"prevNode":{"key":"/message"' ',"value":"Hello World","modifiedIndex":\d+,"createdIndex":\d+}}') self.assertRegexpMatches(text, regex) def test_d_directorylist(self): #List a directory in Etcd dir_list = self.etcd.list_directory('formations/cholcomb') self.assertIsInstance(dir_list, list)
class VerifyFormations(object): def __init__(self, manager, logger): self.logger = logger self.salt_client = salt.client.LocalClient() self.manager = manager self.etcd = Etcd(logger) def start_verifying(self): # Parse out the username and formation name # from the ETCD directory string formation_parser = Literal('/formations/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('username') + Literal('/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('formation_name') # call out to ETCD and load all the formations formation_list = [] user_list = self.etcd.list_directory('formations') if user_list: for user in user_list: formations = self.etcd.list_directory(user) for formation in formations: parse_results = formation_parser.parseString(formation) if parse_results: formation_name = parse_results['formation_name'] username = parse_results['username'] self.logger.info( 'Attempting to load formation: {formation_name} ' 'with username: {username}'.format( formation_name=formation_name, username=username)) f = self.manager.load_formation_from_etcd( username, formation_name) formation_list.append(f) else: self.logger.error("Could not parse the ETCD string") if formation_list: # TODO Use background salt jobs # Start verifying things # Ask salt to do these things for me and give me back an job_id # results = self.salt_client.cmd_async(host, 'cmd.run', # ['netstat -an | grep %s | grep tcp | grep -i listen' % port], # expr_form='list') # # salt-run jobs.lookup_jid <job id number> for f in formation_list: for app in f.application_list: # Check to make sure it's up and running self.logger.info( "Running verification on app: " "{app_name}".format(app_name=app.hostname)) self.logger.info( '{server} docker ps | grep {container_id}'.format( server=app.host_server, container_id=app.container_id)) results = self.salt_client.cmd( app.host_server, 'cmd.run', [ 'docker ps | grep {container_id}'.format( container_id=app.container_id) ], expr_form='list') if results: self.logger.debug( "Salt return: {docker_results}".format( docker_results=results[app.host_server])) if results[app.host_server] == "": self.logger.error( "App {app} is not running!".format( app=app.hostname)) # Start the app back up and run start.sh on there self.start_application(app) else: self.logger.info( "App {app} is running. Checking if " "cron is running also".format( app=app.hostname)) # Check if cron is running on the container and bring it back # up if needed # Log in with ssh and check if cron is up and running self.logger.info( "Sleeping 2 seconds while the container starts" ) time.sleep(2) self.check_running_application(app) else: self.logger.error( "Call out to server {server} failed. Moving it" .format(server=app.host_server)) # move the container self.move_application(app) # Start an application that isn't running def start_application(self, app): # Start the application and run start.sh to kick off cron self.logger.info( "Starting app {app} with docker id: {app_id} up".format( app=app.hostname, app_id=app.container_id)) results = self.salt_client.cmd( app.host_server, 'cmd.run', [ 'docker start {container_id}'.format( container_id=app.container_id) ], expr_form='list') self.logger.debug(results) if results: if "Error: No such container" in results[app.host_server]: # We need to recreate the container self.logger.error("Container is missing on the host!. " "Trying to recreate") self.manager.start_application(app) self.logger.info( "Sleeping 2 seconds while the container starts") time.sleep(2) self.manager.bootstrap_application(app) elif "Error: start: No such container:" in results[ app.host_server]: # Seems the container already exists but won't start. Bug? self.logger.error("Container failed to start") self.move_application(app) else: self.logger.info( "Waiting 2 seconds for docker to start the container") time.sleep(2) self.check_running_application(app) else: # Move the container to another host, this host is messed up self.logger.error( "Failed to start {container_id} on host {host}".format( container_id=app.container_id, host=app.host_server)) self.move_application(app) # Move an application to another host and record the change in etcd def move_application(self, app): old_host = app.host_server cluster_list = self.manager.get_docker_cluster() circular_cluster_list = CircularList( self.manager.order_cluster_by_load(cluster_list)) if app.host_server in circular_cluster_list: index = circular_cluster_list.index(app.host_server) app.host_server = circular_cluster_list[index + 1].hostname else: # Assign the first one in the list if not found above app.host_server = circular_cluster_list[0].hostname self.logger.info( "Moving app {app_name} from {old_host} to {new_host}".format( app_name=app.hostname, old_host=old_host, new_host=app.host_server)) self.logger.info("Bootstrapping the application on the new host") self.start_application(app) # Log into the application via ssh and check everything def check_running_application(self, app): # TODO # Use the docker top command to see if cron is running instead of using ssh try: ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Move this user/pass into a config file self.logger.info('SSHing into host {hostname}:{port}'.format( hostname=app.host_server, port=app.ssh_port)) ssh.connect(hostname=app.host_server, port=app.ssh_port, username='******', password='******') # Is cron running? # If not run start.sh stdin, stdout, stderr = ssh.exec_command("pgrep cron") output = stdout.readlines() self.logger.debug(output) if len(output) == 0: # cron isn't running self.logger.info("Cron is not running. Starting it back up") stdin, stdout, stderr = ssh.exec_command("/root/start.sh") else: self.logger.info("Cron is running.") ssh.close() except SSHException: self.logger.error("Failed to log into server.")
class Manager(object): ''' A manager to orchestrate the creation and deletion of container clusters ''' def __init__(self, logger): self.salt_client = salt.client.LocalClient() self.etcd = Etcd(logger) self.logger = logger # Parse out the username and formation name # from the ETCD directory string self.formation_parser = Literal('/formations/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('username') + Literal('/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('formation_name') def fqdn_to_shortname(self, fqdn): if '.' in fqdn: return fqdn.split('.')[0] else: return fqdn def check_salt_key_used(self, hostname): self.logger.info("Checking if the key for {host} is already used".format( host=hostname)) s = subprocess.Popen('salt-key', shell=True, stdout=PIPE) salt_list = s.communicate()[0] if hostname in salt_list: return True else: return False def check_port_used(self, host, port): self.logger.info("Checking if {port} on {host} is open with salt-client".format( host=host, port=port)) results = self.salt_client.cmd(host, 'cmd.run', ['netstat -an | grep %s | grep tcp | grep -i listen' % port], expr_form='list') self.logger.debug("Salt return: {lsof}".format(lsof=results[host])) if results[host] is not '': return True else: return False # TODO def check_for_existing_formation(self, formation_name): # If the user passed in an existing formation name lets append to it pass def get_docker_cluster(self): # Return a list of docker hosts cluster = self.etcd.get_key('docker_cluster') if cluster is not None: return cluster.split(',') else: return None def get_load_balancer_cluster(self): # Return a list of nginx hosts cluster = self.etcd.get_key('nginx_cluster') if cluster is not None: return cluster.split(',') else: return None def order_cluster_by_load(self, cluster_list): # Sample salt output # {'dlceph01.drwg.local': '0.27 0.16 0.15 1/1200 26234'} # define grammar point = Literal('.') number = Word(nums) floatnumber = Combine( number + point + number) float_list = OneOrMore(floatnumber) results = self.salt_client.cmd(','.join(cluster_list), 'cmd.run', ['cat /proc/loadavg'], expr_form='list') load_list = [] self.logger.debug("Salt load return: {load}".format(load=results)) for host in results: host_load = results[host] match = float_list.parseString(host_load) if match: one_min = match[0] five_min = match[1] fifteen_min = match[2] self.logger.debug("Adding Load({host}, {one_min}, {five_min}, {fifteen_min}".format( host=host, one_min=one_min, five_min=five_min, fifteen_min=fifteen_min)) load_list.append(Load(host, one_min, five_min, fifteen_min)) else: self.logger.error("Could not parse host load output") # Sort the list by fifteen min load load_list = sorted(load_list, key=lambda x: x.fifteen_min_load) for load in load_list: self.logger.debug("Sorted load list: " + str(load)) return load_list # Retun a list of formations the user owns def list_formations(self, username): formation_list = [] formations = self.etcd.list_directory('formations/'+username) for formation in formations: parse_results = self.formation_parser.parseString(formation) if parse_results: formation_name = parse_results['formation_name'] formation_list.append(formation_name) else: self.logger.error("Could not parse the ETCD string") self.logger.info('Formation list {formations} for user {user}'.format( formations=formation_list, user=username)) return formation_list # Load the formation and return a Formation object def load_formation_from_etcd(self, username, formation_name): f = Formation(username,formation_name) app_list = json.loads(json.loads( self.etcd.get_key('/formations/{username}/{formation_name}'.format( username=username, formation_name=formation_name)))) for app in app_list: # If our host doesn't support swapping we're going to get some garbage # message in here if "WARNING" in app['container_id']: app['container_id'] = app['container_id'].replace("WARNING: Your "\ "kernel does not support memory swap capabilities. Limitation discarded.\n","") #Message changed in docker 0.8.0 app['container_id'] = app['container_id'].replace("WARNING: WARNING:"\ "Your kernel does not support swap limit capabilities. Limitation "\ "discarded.\n","") app['container_id'].strip('\n') # Set volumes if needed volumes = None if app['volumes']: self.logger.info("Setting volumes to: " + ''.join(app['volumes'])) volumes = app['volumes'] f.add_app(app['container_id'], app['hostname'], app['cpu_shares'], app['ram'], app['port_list'], app['ssh_port'], 22, app['host_server'], volumes) # Return fully parsed and populated formation object return f def save_formation_to_etcd(self, formation): name = formation.name username = formation.username self.etcd.set_key('formations/{username}/{formation_name}'.format( username=username, formation_name=name), formation) # TODO write code to add new apps to load balancer def add_app_to_nginx(self, app): pass # TODO write code to add new apps to the load balancer def add_app_to_apache(self, app): pass def start_application(self, app): # Run a salt cmd to startup the formation docker_command = "docker run -c={cpu_shares} -d -i -t -h=\"{hostname}\" -m={ram}m "\ "--name={hostname} {port_list} {volume_list} {image} /sbin/my_init -- bash" self.logger.info("Port list %s" % app.port_list) port_list = ' '.join(map(lambda x: '-p ' + x, app.port_list)) # Only create this list if needed volume_list = '' if app.volume_list: volume_list = ' '.join(map(lambda x: '-v ' + x, app.volume_list)) d = docker_command.format(cpu_shares=app.cpu_shares, hostname=app.hostname, ram=app.ram, image=app.docker_image, port_list=port_list, volume_list=volume_list) self.logger.info("Starting up docker container on {host_server} with cmd: {docker_cmd}".format( host_server=app.host_server, docker_cmd=d)) salt_process = self.salt_client.cmd(app.host_server,'cmd.run', [d], expr_form='list') container_id = salt_process[app.host_server] if container_id: if "WARNING" in container_id: container_id = container_id.replace("WARNING: "\ "Your kernel does not support swap limit capabilities. Limitation "\ "discarded.\n","") container_id.strip("\n") #Docker only uses the first 12 chars to identify a container app.change_container_id(container_id[0:12]) def bootstrap_application(self, app): # Log into the host with paramiko and run the salt bootstrap script host_server = self.fqdn_to_shortname(app.host_server) self.logger.info("Bootstrapping {hostname} on server: {host_server} port: {port}".format( hostname=app.hostname, host_server=host_server, port=app.ssh_port)) try: ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(hostname=host_server, port=app.ssh_port, username='******', password='******') transport = paramiko.Transport((host_server, app.ssh_port)) transport.connect(username = '******', password = '******') sftp = paramiko.SFTPClient.from_transport(transport) sftp.put('bootstrap.sh', '/root/bootstrap.sh') sftp.put('start.sh', '/root/start.sh') ssh.exec_command("chmod +x /root/bootstrap.sh") ssh.exec_command("chmod +x /root/start.sh") stdin, stdout, stderr = ssh.exec_command("bash /root/start.sh") self.logger.debug(''.join(stdout.readlines())) ssh.close() except SSHException: self.logger.error("Failed to log into server. Shutting it down and cleaning up the mess.") self.delete_container(app.host_server, app.container_id) # Stops and deletes a container def delete_container(self, host_server, container_id): results = self.salt_client.cmd(host_server, 'cmd.run', ['docker stop {container_id}'.format(container_id=container_id)], expr_form='list') self.logger.debug("Salt return: {stop_cmd}".format(stop_cmd=results[host_server])) results = self.salt_client.cmd(host_server, 'cmd.run', ['docker rm {container_id}'.format(container_id=container_id)], expr_form='list') self.logger.debug("Salt return: {rm_cmd}".format(rm_cmd=results[host_server])) # Stops and deletes a formation. Use with caution def delete_formation(self, user, formation_name): formation_list = self.list_formations(user) if formation_name in formation_list: pass else: self.logger.error("Formation name not found!") def list_containers(self, user, formation_name): pass def create_containers(self, user, number, formation_name, cpu_shares, ram, port_list, hostname_scheme, volume_list, docker_image, force_host_server=None): f = Formation(user, formation_name) # Convert ram to bytes from MB ram = ram * 1024 * 1024 # Get the cluster machines on each creation cluster_list = self.get_docker_cluster() circular_cluster_list = CircularList(self.order_cluster_by_load(cluster_list)) # Loop for the requested amount of containers to be created for i in range(1, number+1): # [{"host_port":ssh_host_port, "container_port":ssh_container_port}] ssh_host_port = 9022 + i ssh_container_port = 22 host_server = circular_cluster_list[i].hostname hostname = '{hostname}{number}'.format( hostname=hostname_scheme, number=str(i).zfill(3)) # First check if we can add this host to salt. If not exit with -1 if self.check_salt_key_used(hostname): self.logger.error('Salt key is already taken for {hostname}'.format( hostname=hostname)) sys.exit(-1) # We are being asked to overwrite this if force_host_server: host_server = force_host_server validated_ports = [] while self.check_port_used(host_server, ssh_host_port): ssh_host_port = ssh_host_port +1 for port in port_list: self.logger.info("Checking if port {port} on {host} is in use".format( port=port, host=host_server)) if ':' in port: ports = port.split(':') # Only check if the host port is free. The container port should be free while self.check_port_used(host_server, ports[0]): ports[0] = int(ports[0]) + 1 # Add this to the validated port list validated_ports.append('{host_port}:{container_port}'.format( host_port = str(ports[0]), container_port = str(ports[1]))) else: while self.check_port_used(host_server, port): port = int(port) + 1 validated_ports.append(str(port)) self.logger.info('Adding app to formation {formation_name}: {hostname} cpu_shares={cpu} ' 'ram={ram} ports={ports} host_server={host_server} docker_image={docker_image}'.format( formation_name=formation_name, hostname=hostname, cpu=cpu_shares, ram=ram, ports=validated_ports, host_server=host_server, docker_image=docker_image)) f.add_app(None, '{hostname}'.format(hostname=hostname), cpu_shares, ram, validated_ports, ssh_host_port, ssh_container_port, host_server, docker_image, volume_list) # Lets get this party started for app in f.application_list: self.start_application(app) #self.logger.info("Sleeping 2 seconds while the container starts") #time.sleep(2) #self.bootstrap_application(app) self.logger.info("Saving the formation to ETCD") self.save_formation_to_etcd(f)
class VerifyFormations(object): def __init__(self, manager, logger): self.logger = logger self.salt_client = salt.client.LocalClient() self.manager = manager self.etcd = Etcd(logger) def start_verifying(self): # Parse out the username and formation name # from the ETCD directory string formation_parser = Literal('/formations/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('username') + Literal('/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('formation_name') # call out to ETCD and load all the formations formation_list = [] user_list = self.etcd.list_directory('formations') if user_list: for user in user_list: formations = self.etcd.list_directory(user) for formation in formations: parse_results = formation_parser.parseString(formation) if parse_results: formation_name = parse_results['formation_name'] username = parse_results['username'] self.logger.info('Attempting to load formation: {formation_name} ' 'with username: {username}'.format(formation_name=formation_name, username=username)) f = self.manager.load_formation_from_etcd(username, formation_name) formation_list.append(f) else: self.logger.error("Could not parse the ETCD string") if formation_list: # TODO Use background salt jobs # Start verifying things # Ask salt to do these things for me and give me back an job_id # results = self.salt_client.cmd_async(host, 'cmd.run', # ['netstat -an | grep %s | grep tcp | grep -i listen' % port], # expr_form='list') # # salt-run jobs.lookup_jid <job id number> for f in formation_list: for app in f.application_list: # Check to make sure it's up and running self.logger.info("Running verification on app: " "{app_name}".format(app_name=app.hostname)) self.logger.info('{server} docker ps | grep {container_id}'.format( server=app.host_server, container_id=app.container_id)) results = self.salt_client.cmd(app.host_server, 'cmd.run', ['docker ps | grep {container_id}'.format(container_id=app.container_id)], expr_form='list') if results: self.logger.debug("Salt return: {docker_results}".format( docker_results=results[app.host_server])) if results[app.host_server] == "": self.logger.error("App {app} is not running!".format( app=app.hostname)) # Start the app back up and run start.sh on there self.start_application(app) else: self.logger.info("App {app} is running. Checking if " "cron is running also".format(app=app.hostname)) # Check if cron is running on the container and bring it back # up if needed # Log in with ssh and check if cron is up and running self.logger.info("Sleeping 2 seconds while the container starts") time.sleep(2) self.check_running_application(app) else: self.logger.error("Call out to server {server} failed. Moving it".format( server=app.host_server)) # move the container self.move_application(app) # Start an application that isn't running def start_application(self, app): # Start the application and run start.sh to kick off cron self.logger.info("Starting app {app} with docker id: {app_id} up".format( app=app.hostname, app_id=app.container_id)) results = self.salt_client.cmd(app.host_server, 'cmd.run', ['docker start {container_id}'.format(container_id=app.container_id)], expr_form='list') self.logger.debug(results) if results: if "Error: No such container" in results[app.host_server]: # We need to recreate the container self.logger.error("Container is missing on the host!. " "Trying to recreate") self.manager.start_application(app) self.logger.info("Sleeping 2 seconds while the container starts") time.sleep(2) self.manager.bootstrap_application(app) elif "Error: start: No such container:" in results[app.host_server]: # Seems the container already exists but won't start. Bug? self.logger.error("Container failed to start") self.move_application(app) else: self.logger.info("Waiting 2 seconds for docker to start the container") time.sleep(2) self.check_running_application(app) else: # Move the container to another host, this host is messed up self.logger.error("Failed to start {container_id} on host {host}".format( container_id=app.container_id, host=app.host_server)) self.move_application(app) # Move an application to another host and record the change in etcd def move_application(self, app): old_host = app.host_server cluster_list = self.manager.get_docker_cluster() circular_cluster_list = CircularList( self.manager.order_cluster_by_load(cluster_list)) if app.host_server in circular_cluster_list: index = circular_cluster_list.index(app.host_server) app.host_server = circular_cluster_list[index+1].hostname else: # Assign the first one in the list if not found above app.host_server = circular_cluster_list[0].hostname self.logger.info("Moving app {app_name} from {old_host} to {new_host}".format( app_name=app.hostname, old_host=old_host, new_host=app.host_server)) self.logger.info("Bootstrapping the application on the new host") self.start_application(app) # Log into the application via ssh and check everything def check_running_application(self, app): # TODO # Use the docker top command to see if cron is running instead of using ssh try: ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Move this user/pass into a config file self.logger.info('SSHing into host {hostname}:{port}'.format( hostname=app.host_server, port=app.ssh_port)) ssh.connect(hostname=app.host_server, port=app.ssh_port, username='******', password='******') # Is cron running? # If not run start.sh stdin, stdout, stderr = ssh.exec_command("pgrep cron") output = stdout.readlines() self.logger.debug(output) if len(output) == 0: # cron isn't running self.logger.info("Cron is not running. Starting it back up") stdin, stdout, stderr = ssh.exec_command("/root/start.sh") else: self.logger.info("Cron is running.") ssh.close() except SSHException: self.logger.error("Failed to log into server.")
class Manager(object): ''' A manager to orchestrate the creation and deletion of container clusters ''' def __init__(self, logger): self.salt_client = salt.client.LocalClient() self.etcd = Etcd(logger) self.logger = logger # Parse out the username and formation name # from the ETCD directory string self.formation_parser = Literal('/formations/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('username') + Literal('/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('formation_name') def fqdn_to_shortname(self, fqdn): if '.' in fqdn: return fqdn.split('.')[0] else: return fqdn def check_salt_key_used(self, hostname): self.logger.info( "Checking if the key for {host} is already used".format( host=hostname)) s = subprocess.Popen('salt-key', shell=True, stdout=PIPE) salt_list = s.communicate()[0] if hostname in salt_list: return True else: return False def check_port_used(self, host, port): self.logger.info( "Checking if {port} on {host} is open with salt-client".format( host=host, port=port)) results = self.salt_client.cmd( host, 'cmd.run', ['netstat -an | grep %s | grep tcp | grep -i listen' % port], expr_form='list') self.logger.debug("Salt return: {lsof}".format(lsof=results[host])) if results[host] is not '': return True else: return False # TODO def check_for_existing_formation(self, formation_name): # If the user passed in an existing formation name lets append to it pass def get_docker_cluster(self): # Return a list of docker hosts cluster = self.etcd.get_key('docker_cluster') if cluster is not None: return cluster.split(',') else: return None def get_load_balancer_cluster(self): # Return a list of nginx hosts cluster = self.etcd.get_key('nginx_cluster') if cluster is not None: return cluster.split(',') else: return None def order_cluster_by_load(self, cluster_list): # Sample salt output # {'dlceph01.drwg.local': '0.27 0.16 0.15 1/1200 26234'} # define grammar point = Literal('.') number = Word(nums) floatnumber = Combine(number + point + number) float_list = OneOrMore(floatnumber) results = self.salt_client.cmd(','.join(cluster_list), 'cmd.run', ['cat /proc/loadavg'], expr_form='list') load_list = [] self.logger.debug("Salt load return: {load}".format(load=results)) for host in results: host_load = results[host] match = float_list.parseString(host_load) if match: one_min = match[0] five_min = match[1] fifteen_min = match[2] self.logger.debug( "Adding Load({host}, {one_min}, {five_min}, {fifteen_min}". format(host=host, one_min=one_min, five_min=five_min, fifteen_min=fifteen_min)) load_list.append(Load(host, one_min, five_min, fifteen_min)) else: self.logger.error("Could not parse host load output") # Sort the list by fifteen min load load_list = sorted(load_list, key=lambda x: x.fifteen_min_load) for load in load_list: self.logger.debug("Sorted load list: " + str(load)) return load_list # Retun a list of formations the user owns def list_formations(self, username): formation_list = [] formations = self.etcd.list_directory('formations/' + username) for formation in formations: parse_results = self.formation_parser.parseString(formation) if parse_results: formation_name = parse_results['formation_name'] formation_list.append(formation_name) else: self.logger.error("Could not parse the ETCD string") self.logger.info('Formation list {formations} for user {user}'.format( formations=formation_list, user=username)) return formation_list # Load the formation and return a Formation object def load_formation_from_etcd(self, username, formation_name): f = Formation(username, formation_name) app_list = json.loads( json.loads( self.etcd.get_key( '/formations/{username}/{formation_name}'.format( username=username, formation_name=formation_name)))) for app in app_list: # If our host doesn't support swapping we're going to get some garbage # message in here if "WARNING" in app['container_id']: app['container_id'] = app['container_id'].replace("WARNING: Your "\ "kernel does not support memory swap capabilities. Limitation discarded.\n","") #Message changed in docker 0.8.0 app['container_id'] = app['container_id'].replace("WARNING: WARNING:"\ "Your kernel does not support swap limit capabilities. Limitation "\ "discarded.\n","") app['container_id'].strip('\n') # Set volumes if needed volumes = None if app['volumes']: self.logger.info("Setting volumes to: " + ''.join(app['volumes'])) volumes = app['volumes'] f.add_app(app['container_id'], app['hostname'], app['cpu_shares'], app['ram'], app['port_list'], app['ssh_port'], 22, app['host_server'], volumes) # Return fully parsed and populated formation object return f def save_formation_to_etcd(self, formation): name = formation.name username = formation.username self.etcd.set_key( 'formations/{username}/{formation_name}'.format( username=username, formation_name=name), formation) # TODO write code to add new apps to load balancer def add_app_to_nginx(self, app): pass # TODO write code to add new apps to the load balancer def add_app_to_apache(self, app): pass def start_application(self, app): # Run a salt cmd to startup the formation docker_command = "docker run -c={cpu_shares} -d -i -t -h=\"{hostname}\" -m={ram}m "\ "--name={hostname} {port_list} {volume_list} {image} /sbin/my_init -- bash" self.logger.info("Port list %s" % app.port_list) port_list = ' '.join(map(lambda x: '-p ' + x, app.port_list)) # Only create this list if needed volume_list = '' if app.volume_list: volume_list = ' '.join(map(lambda x: '-v ' + x, app.volume_list)) d = docker_command.format(cpu_shares=app.cpu_shares, hostname=app.hostname, ram=app.ram, image=app.docker_image, port_list=port_list, volume_list=volume_list) self.logger.info( "Starting up docker container on {host_server} with cmd: {docker_cmd}" .format(host_server=app.host_server, docker_cmd=d)) salt_process = self.salt_client.cmd(app.host_server, 'cmd.run', [d], expr_form='list') container_id = salt_process[app.host_server] if container_id: if "WARNING" in container_id: container_id = container_id.replace("WARNING: "\ "Your kernel does not support swap limit capabilities. Limitation "\ "discarded.\n","") container_id.strip("\n") #Docker only uses the first 12 chars to identify a container app.change_container_id(container_id[0:12]) def bootstrap_application(self, app): # Log into the host with paramiko and run the salt bootstrap script host_server = self.fqdn_to_shortname(app.host_server) self.logger.info( "Bootstrapping {hostname} on server: {host_server} port: {port}". format(hostname=app.hostname, host_server=host_server, port=app.ssh_port)) try: ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(hostname=host_server, port=app.ssh_port, username='******', password='******') transport = paramiko.Transport((host_server, app.ssh_port)) transport.connect(username='******', password='******') sftp = paramiko.SFTPClient.from_transport(transport) sftp.put('bootstrap.sh', '/root/bootstrap.sh') sftp.put('start.sh', '/root/start.sh') ssh.exec_command("chmod +x /root/bootstrap.sh") ssh.exec_command("chmod +x /root/start.sh") stdin, stdout, stderr = ssh.exec_command("bash /root/start.sh") self.logger.debug(''.join(stdout.readlines())) ssh.close() except SSHException: self.logger.error( "Failed to log into server. Shutting it down and cleaning up the mess." ) self.delete_container(app.host_server, app.container_id) # Stops and deletes a container def delete_container(self, host_server, container_id): results = self.salt_client.cmd( host_server, 'cmd.run', ['docker stop {container_id}'.format(container_id=container_id)], expr_form='list') self.logger.debug( "Salt return: {stop_cmd}".format(stop_cmd=results[host_server])) results = self.salt_client.cmd( host_server, 'cmd.run', ['docker rm {container_id}'.format(container_id=container_id)], expr_form='list') self.logger.debug( "Salt return: {rm_cmd}".format(rm_cmd=results[host_server])) # Stops and deletes a formation. Use with caution def delete_formation(self, user, formation_name): formation_list = self.list_formations(user) if formation_name in formation_list: pass else: self.logger.error("Formation name not found!") def list_containers(self, user, formation_name): pass def create_containers(self, user, number, formation_name, cpu_shares, ram, port_list, hostname_scheme, volume_list, docker_image, force_host_server=None): f = Formation(user, formation_name) # Convert ram to bytes from MB ram = ram * 1024 * 1024 # Get the cluster machines on each creation cluster_list = self.get_docker_cluster() circular_cluster_list = CircularList( self.order_cluster_by_load(cluster_list)) # Loop for the requested amount of containers to be created for i in range(1, number + 1): # [{"host_port":ssh_host_port, "container_port":ssh_container_port}] ssh_host_port = 9022 + i ssh_container_port = 22 host_server = circular_cluster_list[i].hostname hostname = '{hostname}{number}'.format(hostname=hostname_scheme, number=str(i).zfill(3)) # First check if we can add this host to salt. If not exit with -1 if self.check_salt_key_used(hostname): self.logger.error( 'Salt key is already taken for {hostname}'.format( hostname=hostname)) sys.exit(-1) # We are being asked to overwrite this if force_host_server: host_server = force_host_server validated_ports = [] while self.check_port_used(host_server, ssh_host_port): ssh_host_port = ssh_host_port + 1 for port in port_list: self.logger.info( "Checking if port {port} on {host} is in use".format( port=port, host=host_server)) if ':' in port: ports = port.split(':') # Only check if the host port is free. The container port should be free while self.check_port_used(host_server, ports[0]): ports[0] = int(ports[0]) + 1 # Add this to the validated port list validated_ports.append( '{host_port}:{container_port}'.format( host_port=str(ports[0]), container_port=str(ports[1]))) else: while self.check_port_used(host_server, port): port = int(port) + 1 validated_ports.append(str(port)) self.logger.info( 'Adding app to formation {formation_name}: {hostname} cpu_shares={cpu} ' 'ram={ram} ports={ports} host_server={host_server} docker_image={docker_image}' .format(formation_name=formation_name, hostname=hostname, cpu=cpu_shares, ram=ram, ports=validated_ports, host_server=host_server, docker_image=docker_image)) f.add_app(None, '{hostname}'.format(hostname=hostname), cpu_shares, ram, validated_ports, ssh_host_port, ssh_container_port, host_server, docker_image, volume_list) # Lets get this party started for app in f.application_list: self.start_application(app) #self.logger.info("Sleeping 2 seconds while the container starts") #time.sleep(2) #self.bootstrap_application(app) self.logger.info("Saving the formation to ETCD") self.save_formation_to_etcd(f)