def scp(files, cluster, host): cmd = "scp -F cli/ssh_config-%s %s %s:%s" % (cluster, ' '.join(files), host, '/tmp') CONSOLE.debug(cmd) ret_val = subprocess_to_log.call(cmd.split(' '), LOG, host) if ret_val != 0: raise Exception( "Error transferring files to new host %s via SCP. See debug log (%s) for details." % (host, LOG_FILE_NAME))
def prepare_bastion(): # Configure the bastion with the PNDA mirror and install nc on it # nc is required for relaying commands through the bastion # to do anything on the other instances files_to_scp = [ 'cli/pnda_env_%s.sh' % self._cluster, 'bootstrap-scripts/package-install.sh' ] cmds_to_run = [ 'source /tmp/pnda_env_%s.sh' % self._cluster, 'export PNDA_CLUSTER=%s' % self._cluster, 'export PNDA_FLAVOR=%s' % self._flavor, 'sudo chmod a+x /tmp/package-install.sh', 'sudo -E /tmp/package-install.sh', 'sudo yum install -y nc' ] nc_scp_cmd = "scp -i %s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null %s %s@%s:%s" % ( self._keyfile, ' '.join(files_to_scp), self._pnda_env['infrastructure']['OS_USER'], bastion_ip, '/tmp') CONSOLE.debug(nc_scp_cmd) ret_val = subprocess_to_log.call(nc_scp_cmd.split(' '), LOG, log_id=bastion_ip) if ret_val != 0: raise Exception( "Error transferring files to new host %s via SCP. See debug log (%s) for details." % (bastion_ip, LOG_FILE_NAME)) nc_ssh_cmd = 'ssh -i %s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null %s@%s' % ( self._keyfile, self._pnda_env['infrastructure']['OS_USER'], bastion_ip) nc_install_cmd = nc_ssh_cmd.split(' ') nc_install_cmd.append(' && '.join(cmds_to_run)) CONSOLE.debug(nc_install_cmd) ret_val = subprocess_to_log.call(nc_install_cmd, LOG, log_id=bastion_ip) if ret_val != 0: raise Exception( "Error running ssh commands on host %s. See debug log (%s) for details." % (bastion_ip, LOG_FILE_NAME))
def scp(self, files, host): host = self._subsitute_host_if_bastion(host) cmd = "scp -F cli/ssh_config-%s %s %s:%s" % ( self._cluster, ' '.join(files), host, '/tmp') CONSOLE.debug(cmd) ret_val = subprocess_to_log.call(cmd.split(' '), LOG, log_id=host) CONSOLE.debug("scp result: %s", ret_val) if ret_val != 0: raise Exception( "Error transferring files to new host %s via SCP. See debug log (%s) for details." % (host, LOG_FILE_NAME))
def scp(files, bastion_ip, host, username, pem_key): cmd = "scp -i %s %s %s@%s:%s" % (pem_key, ' '.join(files), username, host, '/tmp') if bastion_ip: cmd = "scp -F ssh_config-metrics %s %s:%s" % (' '.join(files), host, '/tmp') CONSOLE.debug(cmd) ret_val = subprocess_to_log.call(cmd.split(' '), LOG, host) if ret_val != 0: raise Exception( "Error transferring files to new host %s via SCP. See debug log (%s) for details." % (host, LOG_FILE_NAME))
def ssh(cmds, cluster, host): cmd = "ssh -F cli/ssh_config-%s %s" % (cluster, host) parts = cmd.split(' ') parts.append(';'.join(cmds)) CONSOLE.debug(json.dumps(parts)) ret_val = subprocess_to_log.call(parts, LOG, host, scan_for_errors=['lost connection']) if ret_val != 0: raise Exception( "Error running ssh commands on host %s. See debug log (%s) for details." % (host, LOG_FILE_NAME))
def ssh(cmds, bastion_ip, host, username, pem_key): cmd = "ssh -i %s %s@%s" % (pem_key, username, host) if bastion_ip: cmd = "ssh -F ssh_config-metrics %s" % (host) parts = cmd.split(' ') parts.append(';'.join(cmds)) CONSOLE.debug(json.dumps(parts)) ret_val = subprocess_to_log.call( parts, LOG, host, scan_for_errors=[r'lost connection', r'\s*Failed:\s*[1-9].*']) if ret_val != 0: raise Exception( "Error running ssh commands on host %s. See debug log (%s) for details." % (host, LOG_FILE_NAME))
def ssh(self, cmds, host, output=None): host = self._subsitute_host_if_bastion(host) cmd = "ssh -F cli/ssh_config-%s %s" % (self._cluster, host) parts = cmd.split(' ') parts.append(' && '.join(cmds)) CONSOLE.debug(json.dumps(parts)) ret_val = subprocess_to_log.call(parts, LOG, log_id=host, output=output, scan_for_errors=[ r'lost connection', r'\s*Failed:\s*[1-9].*', r'\s*Failures:' ]) CONSOLE.debug("ssh result: %s", ret_val) if ret_val != 0: raise Exception( "Error running ssh commands on host %s. See debug log (%s) for details." % (host, LOG_FILE_NAME))
def create(template_data, cluster, flavor, keyname, no_config_check, dry_run, branch, existing_machines_def_file): init_runfile(cluster) bastion = NODE_CONFIG['bastion-instance'] to_runfile({ 'cmdline': sys.argv, 'bastion': bastion, 'saltmaster': NODE_CONFIG['salt-master-instance'] }) keyfile = '%s.pem' % keyname if existing_machines_def_file is None: region = PNDA_ENV['ec2_access']['AWS_REGION'] awsAvailabilityZone = PNDA_ENV['ec2_access']['AWS_AVAILABILITY_ZONE'] cf_parameters = [('keyName', keyname), ('pndaCluster', cluster), ('awsAvailabilityZone', awsAvailabilityZone)] for parameter in PNDA_ENV['cloud_formation_parameters']: cf_parameters.append( (parameter, PNDA_ENV['cloud_formation_parameters'][parameter])) if not no_config_check: check_config(keyname, keyfile, None) save_cf_resources('create_%s' % MILLI_TIME(), cluster, cf_parameters, template_data) if dry_run: CONSOLE.info('Dry run mode completed') sys.exit(0) region = PNDA_ENV['ec2_access']['AWS_REGION'] awsAvailabilityZone = PNDA_ENV['ec2_access']['AWS_AVAILABILITY_ZONE'] cf_parameters = [('keyName', keyname), ('pndaCluster', cluster), ('awsAvailabilityZone', awsAvailabilityZone)] for parameter in PNDA_ENV['cloud_formation_parameters']: cf_parameters.append( (parameter, PNDA_ENV['cloud_formation_parameters'][parameter])) if existing_machines_def_file is None: check_config(keyname, keyfile, existing_machines_def_file) CONSOLE.info('Creating Cloud Formation stack') conn = boto.cloudformation.connect_to_region(region) stack_status = 'CREATING' conn.create_stack(cluster, template_body=template_data, parameters=cf_parameters) while stack_status in ['CREATE_IN_PROGRESS', 'CREATING']: time.sleep(5) CONSOLE.info('Stack is: ' + stack_status) stacks = retry(conn.describe_stacks, cluster) if len(stacks) > 0: stack_status = stacks[0].stack_status if stack_status != 'CREATE_COMPLETE': CONSOLE.error('Stack did not come up, status is: ' + stack_status) sys.exit(1) instance_map = get_instance_map(cluster, existing_machines_def_file) bastion_ip = '' bastion_name = cluster + '-' + bastion if bastion_name in instance_map.keys(): bastion_ip = instance_map[cluster + '-' + bastion]['ip_address'] write_ssh_config(cluster, bastion_ip, PNDA_ENV['ec2_access']['OS_USER'], os.path.abspath(keyfile)) CONSOLE.debug( 'The PNDA console will come up on: http://%s', instance_map[cluster + '-' + NODE_CONFIG['console-instance']]['private_ip_address']) if bastion_ip: attempts_per_host = 150 while attempts_per_host > 0: try: nc_ssh_cmd = 'ssh -i %s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null %s@%s' % ( keyfile, PNDA_ENV['ec2_access']['OS_USER'], bastion_ip) nc_install_cmd = nc_ssh_cmd.split(' ') nc_install_cmd.append( 'sudo yum install -y nc || echo nc already installed') ret_val = subprocess_to_log.call(nc_install_cmd, LOG, bastion_ip) if ret_val != 0: raise Exception( "Error running ssh commands on host %s. See debug log (%s) for details." % (bastion_ip, LOG_FILE_NAME)) break except: CONSOLE.info( 'Still waiting for connectivity to bastion. See debug log (%s) for details.', LOG_FILE_NAME) LOG.info(traceback.format_exc()) attempts_per_host -= 1 time.sleep(2) wait_for_host_connectivity( [instance_map[h]['private_ip_address'] for h in instance_map], cluster) CONSOLE.info( 'Bootstrapping saltmaster. Expect this to take a few minutes, check the debug log for progress (%s).', LOG_FILE_NAME) saltmaster = instance_map[cluster + '-' + NODE_CONFIG['salt-master-instance']] saltmaster_ip = saltmaster['private_ip_address'] platform_salt_tarball = None if 'PLATFORM_SALT_LOCAL' in PNDA_ENV['platform_salt']: local_salt_path = PNDA_ENV['platform_salt']['PLATFORM_SALT_LOCAL'] platform_salt_tarball = '%s.tmp' % str(uuid.uuid1()) with tarfile.open(platform_salt_tarball, mode='w:gz') as archive: archive.add(local_salt_path, arcname='platform-salt', recursive=True) scp([platform_salt_tarball], cluster, saltmaster_ip) os.remove(platform_salt_tarball) bootstrap_threads = [] bootstrap_errors = Queue.Queue() bootstrap(saltmaster, saltmaster_ip, cluster, flavor, branch, platform_salt_tarball, bootstrap_errors) process_errors(bootstrap_errors) CONSOLE.info( 'Bootstrapping other instances. Expect this to take a few minutes, check the debug log for progress (%s).', LOG_FILE_NAME) for key, instance in instance_map.iteritems(): if '-' + NODE_CONFIG['salt-master-instance'] not in key: thread = Thread(target=bootstrap, args=[ instance, saltmaster_ip, cluster, flavor, branch, platform_salt_tarball, bootstrap_errors ]) bootstrap_threads.append(thread) for thread in bootstrap_threads: thread.start() time.sleep(2) for thread in bootstrap_threads: ret_val = thread.join() process_errors(bootstrap_errors) time.sleep(30) CONSOLE.info( 'Running salt to install software. Expect this to take 45 minutes or more, check the debug log for progress (%s).', LOG_FILE_NAME) bastion = NODE_CONFIG['bastion-instance'] ssh([ '(sudo salt -v --log-level=debug --timeout=120 --state-output=mixed "*" state.highstate queue=True 2>&1) | tee -a pnda-salt.log; %s' % THROW_BASH_ERROR, '(sudo CLUSTER=%s salt-run --log-level=debug state.orchestrate orchestrate.pnda 2>&1) | tee -a pnda-salt.log; %s' % (cluster, THROW_BASH_ERROR), '(sudo salt "*%s" state.sls hostsfile 2>&1) | tee -a pnda-salt.log; %s' % (bastion, THROW_BASH_ERROR) ], cluster, saltmaster_ip) time.sleep(60) wait_for_host_connectivity( [instance_map[h]['private_ip_address'] for h in instance_map], cluster) return instance_map[cluster + '-' + NODE_CONFIG['console-instance']]['private_ip_address']
def _call(self, cmd): ret_val = subprocess_to_log.call(cmd.split(' '), LOG) if ret_val != 0: raise Exception("Error running %s" % cmd)