def load_config(config_path: str) -> dict: try: with open(config_path) as f: return yaml.safe_load(f) except yaml.YAMLError as ex: raise util.LauncherError('InvalidYaml', None) from ex except FileNotFoundError as ex: raise util.LauncherError('MissingConfig', None) from ex
def get_launcher(config, env=None): """Returns the correct class of launcher from a validated launch config dict """ platform = config['platform'] provider = config['provider'] if platform == 'aws': if provider == 'aws': return aws.DcosCloudformationLauncher(config, env=env) if provider == 'onprem': return aws.OnPremLauncher(config, env=env) if provider == 'terraform': return terraform.AwsLauncher(config, env=env) if platform == 'azure': if provider == 'azure': return arm.AzureResourceGroupLauncher(config, env=env) if provider == 'dcos-engine': return dcos_engine.DcosEngineLauncher(config, env=env) if provider == 'terraform': return terraform.AzureLauncher(config, env=env) if platform == 'gcp': if provider == 'terraform': return terraform.GcpLauncher(config, env=env) if provider == 'onprem': return gcp.OnPremLauncher(config, env=env) raise util.LauncherError('UnsupportedAction', 'Launch platform not supported: {}'.format(platform))
def create(self): """ Checks if the key helper or zen helper are enabled, provides resources according to those helpers, tracking which resources were created, and then attempts to deploy the template. Note: both key helper and zen helper will mutate the config to inject the appropriate template parameters for the generated resources """ temp_resources = {} temp_resources.update(self.key_helper()) temp_resources.update(self.zen_helper()) try: stack = self.boto_wrapper.create_stack( self.config['deployment_name'], self.config['template_parameters'], template_url=self.config.get('template_url'), template_body=self.config.get('template_body'), disable_rollback=self.config['disable_rollback'], tags=self.config.get('tags')) except Exception as ex: self.delete_temp_resources(temp_resources) raise util.LauncherError('ProviderError', None) from ex self.config.update({ 'stack_id': stack.stack_id, 'temp_resources': temp_resources}) return self.config
def deployment(self): """ Builds a BareClusterDeployment instance with self.config, but only returns it successfully if the corresponding real deployment (active machines) exists and doesn't contain any errors. """ try: deployment = gce.BareClusterDeployment(self.gce_wrapper, self.config['deployment_name'], self.config['gce_zone']) info = deployment.get_info() errors = info['operation'].get('error') if errors: raise util.LauncherError('DeploymentContainsErrors', str(errors)) return deployment except HttpError as e: if e.resp.status == 404: raise util.LauncherError('DeploymentNotFound', "The deployment you are trying to access doesn't exist") from e raise e
def get_bare_cluster_launcher(self): if self.config['platform'] == 'aws': return aws.BareClusterLauncher(self.config, env=self.env) elif self.config['platform'] == 'gcp': return gcp.BareClusterLauncher(self.config, env=self.env) else: raise util.LauncherError( 'PlatformNotSupported', 'Platform currently not supported for onprem: {}'.format(self.config['platform']))
def _validate_genconf_scripts(genconf_dir, dcos_config): for script in ('ip_detect', 'ip_detect_public', 'fault_domain_detect'): filename_key = script + '_filename' if filename_key in dcos_config: if os.path.isabs(dcos_config[filename_key]): continue if not os.path.exists( os.path.join(genconf_dir, dcos_config[filename_key])): raise util.LauncherError( 'FileNotFoundError', '{} script must exist in the genconf dir ({})'.format( dcos_config[filename_key], genconf_dir))
def key_helper(self): """ If key_helper is true, then create an EC2 keypair with the same name as the cloudformation stack, update the config with the resulting private key, and amend the cloudformation template parameters to have KeyName set as this key """ if not self.config['key_helper']: return {} if 'KeyName' in self.config['template_parameters']: raise util.LauncherError('KeyHelperError', 'KeyName cannot be set in ' 'template_parameters when key_helper is true') key_name = self.config['deployment_name'] private_key = self.boto_wrapper.create_key_pair(key_name) self.config.update({'ssh_private_key': private_key}) self.config['template_parameters'].update({'KeyName': key_name}) return {'key_name': key_name}
def __init__(self, config: dict, env=None): if env is None: env = os.environ.copy() if 'GCE_CREDENTIALS' in env: json_credentials = env['GCE_CREDENTIALS'] elif 'GCE_CREDENTIALS_PATH' in env: json_credentials = util.read_file(env['GCE_CREDENTIALS_PATH']) else: raise util.LauncherError( 'MissingParameter', 'Either GCE_CREDENTIALS or GCE_CREDENTIALS_PATH must be set in env' ) credentials_dict = json.loads(json_credentials) self.gce_wrapper = gce.GceWrapper(credentials_dict) self.config = config
def deduce_image_project(doc: dict): src_image = doc['source_image'] if 'centos' in src_image or 'cent-os' in src_image: return 'centos-cloud' if 'rhel' in src_image: return 'rhel-cloud' if 'ubuntu' in src_image: return 'ubuntu-os-cloud' if 'coreos' in src_image: return 'coreos-cloud' if 'debian' in src_image: return 'debian-cloud' raise util.LauncherError('ValidationError', """Couldn't deduce the image project for your source image. Please specify the "image_project" parameter in your dcos-launch config. Possible values are: centos-cloud, rhel-cloud, ubuntu-os-cloud, coreos-cloud and debian-cloud.""")
def get_credentials(env) -> tuple: path = None if env is None: env = os.environ.copy() if 'GCE_CREDENTIALS' in env: json_credentials = env['GCE_CREDENTIALS'] elif 'GOOGLE_APPLICATION_CREDENTIALS' in env: path = env['GOOGLE_APPLICATION_CREDENTIALS'] json_credentials = util.read_file(path) else: raise util.LauncherError( 'MissingParameter', 'Either GCE_CREDENTIALS or GOOGLE_APPLICATION_CREDENTIALS must be set in env' ) return json_credentials, path
def test(self, args: list, env_dict: dict, test_host: str = None, test_port: int = 22, details: dict = None) -> int: # TODO only reason this exists is because private IPs are not yet returned from describe(), which are required # by the parent test() function """ Connects to master host with SSH and then run the internal integration test Args: args: a list of args that will follow the py.test command env_dict: the env to use during the test """ if args is None: args = list() if self.config[ 'ssh_private_key'] == util.NO_TEST_FLAG or 'ssh_user' not in self.config: raise util.LauncherError( 'MissingInput', 'DC/OS Launch is missing sufficient SSH info to run tests!') if details is None: details = self.describe() # check for any environment variables that contain spaces env_dict = { e: "'{}'".format(env_dict[e]) if ' ' in env_dict[e] else env_dict[e] for e in env_dict } env_string = ' '.join( ['{}={}'.format(e, env_dict[e]) for e in env_dict]) arg_string = ' '.join(args) # To support 1.8.9-EE, try using the dcos-integration-test-ee folder if possible pytest_cmd = """ "source /opt/mesosphere/environment.export && cd `find /opt/mesosphere/active/ -name dcos-integration-test* | sort | tail -n 1` && {env} py.test {args}" """.format(env=env_string, args=arg_string) log.info('Running integration test...') if test_host is None: test_host = details['masters'][0]['public_ip'] if ':' in test_host: test_host, test_port = test_host.split(':') env_dict['DCOS_DNS_ADDRESS'] = 'http://' + test_host return util.try_to_output_unbuffered(self.config, test_host, pytest_cmd, test_port)
def get_completed_onprem_config(self) -> typing.Tuple[dict, str]: """ Will fill in the necessary and/or recommended sections of the config file, including: * starting a ZK backend if left undefined * filling in the master_list for a static exhibitor backend * adding ip-detect script * adding ip-detect-public script * adding fault domain real or logical script Returns: config dict, path to genconf directory """ cluster = self.get_onprem_cluster() onprem_config = self.config['dcos_config'] # Every install will need a cluster-clocal bootstrap URL with this installer onprem_config[ 'bootstrap_url'] = 'http://' + cluster.bootstrap_host.private_ip # Its possible that the masters may live outside the cluster being installed if 'master_list' not in onprem_config: onprem_config['master_list'] = json.dumps( [h.private_ip for h in cluster.masters]) # First, try and retrieve the agent list from the cluster # if the user wanted to use exhibitor as the backend, then start it exhibitor_backend = onprem_config.get('exhibitor_storage_backend') if exhibitor_backend == 'zookeeper' and 'exhibitor_zk_hosts' not in onprem_config: zk_service_name = 'dcos-bootstrap-zk' with self.get_bootstrap_ssh_client().tunnel( cluster.bootstrap_host.public_ip) as t: if not platforms_onprem.get_docker_service_status( t, zk_service_name): platforms_onprem.start_docker_service( t, zk_service_name, [ '--publish=2181:2181', '--publish=2888:2888', '--publish=3888:3888', 'jplock/zookeeper' ]) onprem_config[ 'exhibitor_zk_hosts'] = cluster.bootstrap_host.private_ip + ':2181' elif exhibitor_backend == 'static' and 'master_list' not in onprem_config: onprem_config['master_list'] = [ h.private_ip for h in cluster.masters ] # Check for ip-detect configuration and inject defaults if not present # set the simple default IP detect script if not provided genconf_dir = self.config['genconf_dir'] if not os.path.exists(genconf_dir): os.makedirs(genconf_dir) for script in ('ip_detect', 'ip_detect_public', 'fault_domain_detect'): script_hyphen = script.replace('_', '-') default_path_local = os.path.join(genconf_dir, script_hyphen) filename_key = script + '_filename' if script == 'fault_domain_detect': if 'fault_domain_helper' in self.config: # fault_domain_helper is enabled; use it with open(default_path_local, 'w') as f: f.write(self._fault_domain_helper()) continue elif onprem_config.get('fault_domain_enabled') == 'false': # fault domain is explicitly disabled, so inject nothing. # if disabled implicitly, the injected default won't be used continue if filename_key in onprem_config: if not onprem_config[script + '_filename'].startswith('genconf'): raise util.LauncherError( 'ValidationError', 'Only files in the genconf folder will be copied') local_script_path = onprem_config[filename_key].replace( 'genconf', genconf_dir) if not os.path.exists(local_script_path): raise util.LauncherError( 'MissingInput', '{} script must exist at the given path ({})'.format( script_hyphen, local_script_path)) elif script + '_contents' in onprem_config: continue elif os.path.exists(default_path_local): continue elif script == 'ip_detect_public': # this is a special case where DC/OS does not expect this field by default onprem_config[filename_key] = os.path.join( 'genconf', script_hyphen) # use a sensible default shutil.copyfile( pkg_resources.resource_filename( dcos_launch.__name__, script_hyphen + '/{}.sh'.format(self.config['platform'])), default_path_local) with open(os.path.join(genconf_dir, 'config.yaml'), 'w') as f: f.write(yaml.safe_dump(onprem_config)) log.debug('Generated cluster configuration: {}'.format(onprem_config)) return onprem_config, genconf_dir
def _raise_errors(validator: LaunchValidator): message = _expand_error_dict(validator.errors) raise util.LauncherError('ValidationError', message)
def stack(self): try: return aws.fetch_stack(self.config['stack_id'], self.boto_wrapper) except Exception as ex: raise util.LauncherError('StackNotFound', None) from ex
def create(self): try: if os.path.exists(self.init_dir): raise util.LauncherError( 'ClusterAlreadyExists', "Either the cluster you are trying to create is " "already running or the init_dir you specified in your" " config is already used by another active cluster.") os.makedirs(self.init_dir) # Check if Terraform is installed by running 'terraform version'. If that fails, install Terraform. try: subprocess.run([self.terraform_cmd(), 'version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except FileNotFoundError: log.info( 'No Terraform installation detected. Terraform is now being installed.' ) self._install_terraform() if self.config['key_helper']: self.key_helper() else: log.warning( 'WARNING: Since you did not set "key_helper: true" in your config, make sure your ' 'ssh-agent is running i.e. "eval `ssh-agent -s`" and that you have added your private key ' 'to it i.e. "ssh-add /path/to/key.pem". ssh-agent usage is specific to terraform, not ' 'dcos-launch.') repo = 'terraform-dcos-enterprise' if self.config[ 'dcos-enterprise'] else 'terraform-dcos' version = self.config['terraform_dcos_enterprise_version'] if self.config['dcos-enterprise'] else \ self.config['terraform_dcos_version'] module = 'github.com/dcos/{}?ref={}/{}'.format( repo, version, self.config['platform']) # Converting our YAML config to the required format. You can find an example of that format in the # Advance YAML Configuration" section here: # https://github.com/mesosphere/terraform-dcos-enterprise/tree/master/aws with open(self.cluster_profile_path, 'w') as file: for k, v in self.config['terraform_config'].items(): file.write(k + ' = ') if type(k) is dict: file.write('<<EOF\n{}\nEOF\n'.format(yaml.dump(v))) else: file.write('"{}"\n'.format(v)) subprocess.run( [self.terraform_cmd(), 'init', '-from-module', module], cwd=self.init_dir, check=True, stderr=subprocess.STDOUT) self._init_dir_gpu_setup() subprocess.run([ self.terraform_cmd(), 'apply', '-auto-approve', '-var-file', self.cluster_profile_path ], cwd=self.init_dir, check=True, stderr=subprocess.STDOUT, env=os.environ) except Exception as e: self.create_exception = e return self.config