def get_raw_data(self, job_binary, **kwargs): self._validate_job_binary_url(job_binary) proxy_configs = kwargs.pop('proxy_configs', None) with_context = kwargs.pop('with_context', False) if not with_context: conn_kwargs = {} if proxy_configs: conn_kwargs.update(username=proxy_configs.get( 'proxy_username'), password=key_manager.get_secret( proxy_configs.get('proxy_password')), trust_id=proxy_configs.get( 'proxy_trust_id')) else: conn_kwargs.update(username=job_binary.extra.get('user'), password=key_manager.get_secret( job_binary.extra.get('password'))) conn = sw.client(**conn_kwargs) else: conn = sw.client_from_token() raw = self._get_raw_data(job_binary, conn) return raw
def _upload_wrapper_xml(self, where, job_dir, job_configs): xml_name = 'spark.xml' proxy_configs = job_configs.get('proxy_configs') configs = {} cfgs = job_configs.get('configs', {}) if proxy_configs: configs[sw.HADOOP_SWIFT_USERNAME] = proxy_configs.get( 'proxy_username') configs[sw.HADOOP_SWIFT_PASSWORD] = key_manager.get_secret( proxy_configs.get('proxy_password')) configs[sw.HADOOP_SWIFT_TRUST_ID] = proxy_configs.get( 'proxy_trust_id') configs[sw.HADOOP_SWIFT_DOMAIN_NAME] = CONF.proxy_user_domain_name else: targets = [sw.HADOOP_SWIFT_USERNAME] configs = {k: cfgs[k] for k in targets if k in cfgs} if sw.HADOOP_SWIFT_PASSWORD in cfgs: configs[sw.HADOOP_SWIFT_PASSWORD] = ( key_manager.get_secret(cfgs[sw.HADOOP_SWIFT_PASSWORD]) ) for s3_cfg_key in s3_common.S3_DS_CONFIGS: if s3_cfg_key in cfgs: if s3_cfg_key == s3_common.S3_SECRET_KEY_CONFIG: configs[s3_cfg_key] = ( key_manager.get_secret(cfgs[s3_cfg_key]) ) else: configs[s3_cfg_key] = cfgs[s3_cfg_key] content = xmlutils.create_hadoop_xml(configs) with remote.get_remote(where) as r: dst = os.path.join(job_dir, xml_name) r.write_file_to(dst, content) return xml_name
def get_raw_data(job_binary, proxy_configs=None): conn_kwargs = {} if proxy_configs: conn_kwargs.update(username=proxy_configs.get('proxy_username'), password=key_manager.get_secret( proxy_configs.get('proxy_password')), trust_id=proxy_configs.get('proxy_trust_id')) else: conn_kwargs.update(username=job_binary.extra.get('user'), password=key_manager.get_secret( job_binary.extra.get('password'))) conn = sw.client(**conn_kwargs) return _get_raw_data(job_binary, conn)
def get_oozie_password(cluster): cluster = conductor.cluster_get(context.ctx(), cluster) extra = cluster.extra.to_dict() if 'oozie_pass_id' not in extra: extra['oozie_pass_id'] = u.generate_random_password() conductor.cluster_update(context.ctx(), cluster, {'extra': extra}) return castellan.get_secret(extra['oozie_pass_id'])
def get_hive_password(cluster): cluster = conductor.cluster_get(context.ctx(), cluster) extra = cluster.extra.to_dict() if 'hive_pass_id' not in extra: extra['hive_pass_id'] = u.generate_random_password() conductor.cluster_update(context.ctx(), cluster, {'extra': extra}) return castellan.get_secret(extra['hive_pass_id'])
def _get_s3_client(extra): sess = botocore.session.get_session() secretkey = key_manager.get_secret(extra['secretkey']) return sess.create_client( 's3', # TODO(jfreud): investigate region name support region_name=None, # TODO(jfreud): investigate configurable verify verify=False, endpoint_url=extra['endpoint'], aws_access_key_id=extra['accesskey'], aws_secret_access_key=secretkey)
def get_configs(self, input_data, output_data, proxy_configs=None): configs = {} if proxy_configs: configs[sw.HADOOP_SWIFT_USERNAME] = proxy_configs.get( 'proxy_username') configs[sw.HADOOP_SWIFT_PASSWORD] = key_manager.get_secret( proxy_configs.get('proxy_password')) configs[sw.HADOOP_SWIFT_TRUST_ID] = proxy_configs.get( 'proxy_trust_id') configs[sw.HADOOP_SWIFT_DOMAIN_NAME] = CONF.proxy_user_domain_name return configs for src in (input_data, output_data): if src.type == "swift" and hasattr(src, "credentials"): if "user" in src.credentials: configs[sw.HADOOP_SWIFT_USERNAME] = src.credentials['user'] if "password" in src.credentials: configs[sw.HADOOP_SWIFT_PASSWORD] = ( key_manager.get_secret(src.credentials['password'])) break for src in (input_data, output_data): if src.type == "s3" and hasattr(src, "credentials"): if "accesskey" in src.credentials: configs[s3_common.S3_ACCESS_KEY_CONFIG] = ( src.credentials['accesskey']) if "secretkey" in src.credentials: configs[s3_common.S3_SECRET_KEY_CONFIG] = ( key_manager.get_secret(src.credentials['secretkey'])) if "endpoint" in src.credentials: configs[s3_common.S3_ENDPOINT_CONFIG] = ( src.credentials['endpoint']) if "bucket_in_path" in src.credentials: configs[s3_common.S3_BUCKET_IN_PATH_CONFIG] = ( src.credentials['bucket_in_path']) if "ssl" in src.credentials: configs[s3_common.S3_SSL_CONFIG] = ( src.credentials['ssl']) break return configs
def get_configs(self, input_data, output_data, proxy_configs=None): configs = {} if proxy_configs: configs[sw.HADOOP_SWIFT_USERNAME] = proxy_configs.get( 'proxy_username') configs[sw.HADOOP_SWIFT_PASSWORD] = key_manager.get_secret( proxy_configs.get('proxy_password')) configs[sw.HADOOP_SWIFT_TRUST_ID] = proxy_configs.get( 'proxy_trust_id') configs[sw.HADOOP_SWIFT_DOMAIN_NAME] = CONF.proxy_user_domain_name return configs for src in (input_data, output_data): if src.type == "swift" and hasattr(src, "credentials"): if "user" in src.credentials: configs[sw.HADOOP_SWIFT_USERNAME] = src.credentials['user'] if "password" in src.credentials: configs[sw.HADOOP_SWIFT_PASSWORD] = ( key_manager.get_secret(src.credentials['password'])) break return configs
def get_configs(self, input_data, output_data, proxy_configs=None): configs = {} if proxy_configs: configs[sw.HADOOP_SWIFT_USERNAME] = proxy_configs.get( 'proxy_username') configs[sw.HADOOP_SWIFT_PASSWORD] = key_manager.get_secret( proxy_configs.get('proxy_password')) configs[sw.HADOOP_SWIFT_TRUST_ID] = proxy_configs.get( 'proxy_trust_id') configs[sw.HADOOP_SWIFT_DOMAIN_NAME] = CONF.proxy_user_domain_name return configs for src in (input_data, output_data): if src.type == "swift" and hasattr(src, "credentials"): if "user" in src.credentials: configs[sw.HADOOP_SWIFT_USERNAME] = src.credentials['user'] if "password" in src.credentials: configs[sw.HADOOP_SWIFT_PASSWORD] = ( key_manager.get_secret(src.credentials['password'])) break for src in (input_data, output_data): if src.type == "s3" and hasattr(src, "credentials"): if "accesskey" in src.credentials: configs[s3_common.S3_ACCESS_KEY_CONFIG] = ( src.credentials['accesskey']) if "secretkey" in src.credentials: configs[s3_common.S3_SECRET_KEY_CONFIG] = ( key_manager.get_secret(src.credentials['secretkey'])) if "endpoint" in src.credentials: configs[s3_common.S3_ENDPOINT_CONFIG] = ( src.credentials['endpoint']) if "bucket_in_path" in src.credentials: configs[s3_common.S3_BUCKET_IN_PATH_CONFIG] = ( src.credentials['bucket_in_path']) if "ssl" in src.credentials: configs[s3_common.S3_SSL_CONFIG] = (src.credentials['ssl']) break return configs
def get_oozie_password(cluster): cluster = conductor.cluster_get(context.ctx(), cluster) extra = cluster.extra.to_dict() if 'oozie_pass_id' not in extra: des = cluster.description password = "" if len(des) > 5 and des[-6:] == "backup": password = _get_oozie_password(cluster) if password == "": password = u.generate_random_password() extra['oozie_pass_id'] = password conductor.cluster_update(context.ctx(), cluster, {'extra': extra}) return castellan.get_secret(extra['oozie_pass_id'])
def get_server_password(cluster): if using_existing_kdc(cluster): return get_admin_password(cluster) ctx = context.ctx() cluster = conductor.cluster_get(ctx, cluster) extra = cluster.extra.to_dict() if cluster.extra else {} passwd_key = 'admin-passwd-kdc' if passwd_key not in extra: passwd = _get_short_uuid() key_id = key_manager.store_secret(passwd, ctx) extra[passwd_key] = key_id cluster = conductor.cluster_update(ctx, cluster, {'extra': extra}) passwd = key_manager.get_secret(extra.get(passwd_key), ctx) return passwd
def get_configs(self, proxy_configs=None): configs = {} if proxy_configs: configs[sw.HADOOP_SWIFT_USERNAME] = proxy_configs.get( 'proxy_username') configs[sw.HADOOP_SWIFT_PASSWORD] = key_manager.get_secret( proxy_configs.get('proxy_password')) configs[sw.HADOOP_SWIFT_TRUST_ID] = proxy_configs.get( 'proxy_trust_id') configs[sw.HADOOP_SWIFT_DOMAIN_NAME] = CONF.proxy_user_domain_name return configs return configs
def delete_proxy_user_for_cluster(cluster): '''Delete a proxy user based on a Cluster :param cluster: The cluster model with proxy user information ''' proxy_configs = cluster.cluster_configs.get('proxy_configs') if proxy_configs is not None: proxy_username = proxy_configs.get('proxy_username') proxy_trust_id = proxy_configs.get('proxy_trust_id') proxy_user = k.auth_for_proxy( proxy_username, key_manager.get_secret(proxy_configs.get('proxy_password')), proxy_trust_id) t.delete_trust(proxy_user, proxy_trust_id) proxy_user_delete(proxy_username) key_manager.delete_secret(proxy_configs.get('proxy_password')) update = {'cluster_configs': cluster.cluster_configs.to_dict()} del update['cluster_configs']['proxy_configs'] conductor.cluster_update(context.ctx(), cluster, update)
def delete_proxy_user_for_cluster(cluster): '''Delete a proxy user based on a Cluster :param cluster: The cluster model with proxy user information ''' proxy_configs = cluster.cluster_configs.get('proxy_configs') if proxy_configs is not None: proxy_username = proxy_configs.get('proxy_username') proxy_trust_id = proxy_configs.get('proxy_trust_id') proxy_user = k.auth_for_proxy(proxy_username, key_manager.get_secret( proxy_configs.get('proxy_password')), proxy_trust_id) t.delete_trust(proxy_user, proxy_trust_id) proxy_user_delete(proxy_username) key_manager.delete_secret(proxy_configs.get('proxy_password')) update = {'cluster_configs': cluster.cluster_configs.to_dict()} del update['cluster_configs']['proxy_configs'] conductor.cluster_update(context.ctx(), cluster, update)
def delete_proxy_user_for_job_execution(job_execution): '''Delete a proxy user based on a JobExecution :param job_execution: The job execution with proxy user information :returns: An updated job_configs dictionary or None ''' proxy_configs = job_execution.job_configs.get('proxy_configs') if proxy_configs is not None: proxy_username = proxy_configs.get('proxy_username') proxy_trust_id = proxy_configs.get('proxy_trust_id') proxy_user = k.auth_for_proxy( proxy_username, key_manager.get_secret(proxy_configs.get('proxy_password')), proxy_trust_id) t.delete_trust(proxy_user, proxy_trust_id) proxy_user_delete(proxy_username) key_manager.delete_secret(proxy_configs.get('proxy_password')) update = job_execution.job_configs.to_dict() del update['proxy_configs'] return update return None
def get_password(cluster, pw_name): """return a password for the named entry This function will return, or create and return, a password for the named entry. It will store the password in the key manager and use the ID in the database entry. :param cluster: The cluster record containing the password :param pw_name: The entry name associated with the password :returns: The cleartext password """ ctx = context.ctx() cluster = conductor.cluster_get(ctx, cluster.id) passwd = cluster.extra.get(pw_name) if cluster.extra else None if passwd: return key_manager.get_secret(passwd, ctx) passwd = six.text_type(uuid.uuid4()) extra = cluster.extra.to_dict() if cluster.extra else {} extra[pw_name] = key_manager.store_secret(passwd, ctx) cluster = conductor.cluster_update(ctx, cluster, {'extra': extra}) return passwd
def delete_proxy_user_for_job_execution(job_execution): '''Delete a proxy user based on a JobExecution :param job_execution: The job execution with proxy user information :returns: An updated job_configs dictionary or None ''' proxy_configs = job_execution.job_configs.get('proxy_configs') if proxy_configs is not None: proxy_username = proxy_configs.get('proxy_username') proxy_trust_id = proxy_configs.get('proxy_trust_id') proxy_user = k.auth_for_proxy(proxy_username, key_manager.get_secret( proxy_configs.get('proxy_password')), proxy_trust_id) t.delete_trust(proxy_user, proxy_trust_id) proxy_user_delete(proxy_username) key_manager.delete_secret(proxy_configs.get('proxy_password')) update = job_execution.job_configs.to_dict() del update['proxy_configs'] return update return None
def _get_hadoop_configs(pctx, instance): cluster = instance.node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(instance) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR } } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs['YARN'] = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (HADOOP_CONF_DIR) } confs['MapReduce'] = {'mapreduce.framework.name': 'yarn'} hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs['MapReduce']['mapreduce.jobhistory.address'] = ("%s:10020" % hs_hostname) oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs(cluster)) confs['JobFlow'] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update( {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) if hive_hostname: hive_pass = u.get_hive_password(cluster) hive_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } if c_helper.is_mysql_enabled(pctx, cluster): hive_cfg.update({ 'javax.jdo.option.ConnectionURL': 'jdbc:mysql://%s/metastore' % hive_hostname, 'javax.jdo.option.ConnectionDriverName': 'com.mysql.jdbc.Driver', 'javax.jdo.option.ConnectionUserName': '******', 'javax.jdo.option.ConnectionPassword': hive_pass, 'datanucleus.autoCreateSchema': 'false', 'datanucleus.fixedDatastore': 'true', 'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname, }) proxy_configs = cluster.cluster_configs.get('proxy_configs') if proxy_configs and c_helper.is_swift_enabled(pctx, cluster): hive_cfg.update({ swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'], swift.HADOOP_SWIFT_PASSWORD: key_manager.get_secret(proxy_configs['proxy_password']), swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'], swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name }) confs['Hive'] = hive_cfg return confs
def get_secret(id, ctx=None, **kwargs): return castellan_utils.get_secret(id, ctx=ctx)
def _get_hadoop_configs(pctx, instance): cluster = instance.node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(instance) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR } } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs['YARN'] = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % ( HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % ( HADOOP_CONF_DIR) } confs['MapReduce'] = { 'mapreduce.framework.name': 'yarn' } hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs['MapReduce']['mapreduce.jobhistory.address'] = ( "%s:10020" % hs_hostname) oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs['JobFlow'] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) if hive_hostname: hive_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } if c_helper.is_mysql_enabled(pctx, cluster): hive_cfg.update({ 'javax.jdo.option.ConnectionURL': 'jdbc:mysql://%s/metastore' % hive_hostname, 'javax.jdo.option.ConnectionDriverName': 'com.mysql.jdbc.Driver', 'javax.jdo.option.ConnectionUserName': '******', 'javax.jdo.option.ConnectionPassword': '******', 'datanucleus.autoCreateSchema': 'false', 'datanucleus.fixedDatastore': 'true', 'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname, }) proxy_configs = cluster.cluster_configs.get('proxy_configs') if proxy_configs and c_helper.is_swift_enabled(pctx, cluster): hive_cfg.update({ swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'], swift.HADOOP_SWIFT_PASSWORD: key_manager.get_secret( proxy_configs['proxy_password']), swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'], swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name }) confs['Hive'] = hive_cfg return confs