def test_load_plugin_config_objects(self): pls = self.plugin_spec pls.service_file_name_map = pls._load_service_file_name_map() pls.default_configs = pls._load_default_configs() actual = pls._load_plugin_config_objects() expected = [ p.Config('k0', 'service_2', 'cluster', default_value='default_value_0', description='description_0'), p.Config('k1', 'service_2', 'node', config_type='int', default_value=3, priority=1), p.Config('k2', 'service_2', 'cluster', config_type='bool', is_optional=True), p.Config('k3', 'service_2', 'node', is_optional=True), p.Config('k4', 'general', 'cluster', is_optional=False) ] m_actual = map(lambda i: i.to_dict(), actual) m_expected = map(lambda i: i.to_dict(), expected) self.assertItemsEqual(m_expected, m_actual)
def _get_ha_params(): enable_namenode_ha = provisioning.Config( name=common.NAMENODE_HA, applicable_target="general", scope="cluster", config_type="bool", default_value=False, is_optional=True, description=_("Enable NameNode HA"), priority=1) enable_resourcemanager_ha = provisioning.Config( name=common.RESOURCEMANAGER_HA, applicable_target="general", scope="cluster", config_type="bool", default_value=False, is_optional=True, description=_("Enable ResourceManager HA"), priority=1) enable_regionserver_ha = provisioning.Config( name=common.HBASE_REGIONSERVER_HA, applicable_target="general", scope="cluster", config_type="bool", default_value=False, is_optional=True, description=_("Enable HBase RegionServer HA"), priority=1) return [ enable_namenode_ha, enable_resourcemanager_ha, enable_regionserver_ha ]
def _get_repo_configs(self): ubuntu_base = p.Config( name="Ubuntu base repo", applicable_target="general", scope='cluster', priority=1, default_value="", description=_('Specifies Ubuntu MapR core repository.')) centos_base = p.Config( name="CentOS base repo", applicable_target="general", scope='cluster', priority=1, default_value="", description=_('Specifies CentOS MapR core repository.')) ubuntu_eco = p.Config( name="Ubuntu ecosystem repo", applicable_target="general", scope='cluster', priority=1, default_value="", description=_('Specifies Ubuntu MapR ecosystem repository.')) centos_eco = p.Config( name="CentOS ecosystem repo", applicable_target="general", scope='cluster', priority=1, default_value="", description=_('Specifies CentOS MapR ecosystem repository.')) return [ubuntu_base, centos_base, ubuntu_eco, centos_eco]
def _build_configs_and_plugin(): c1 = p.Config('n-1', 'at-1', 'cluster') c2 = p.Config('n-2', 'at-2', 'cluster') c3 = p.Config('n-3', 'at-1', 'node') class TestPlugin(TestEmptyPlugin): def get_configs(self, hadoop_version): return [c1, c2, c3] return c1, c2, c3, TestPlugin()
def _initialise_configs(): configs = [] for service, config_lists in six.iteritems(XML_CONFS): for config_list in config_lists: for config in config_list: if config['name'] not in HIDDEN_CONFS: cfg = p.Config(config['name'], service, "node", is_optional=True, config_type="string", default_value=str(config['value']), description=config['description']) if cfg.default_value in ["true", "false"]: cfg.config_type = "bool" cfg.default_value = (cfg.default_value == 'true') elif utils.is_int(cfg.default_value): cfg.config_type = "int" cfg.default_value = int(cfg.default_value) if config['name'] in CLUSTER_WIDE_CONFS: cfg.scope = 'cluster' if config['name'] in PRIORITY_1_CONFS: cfg.priority = 1 configs.append(cfg) for service, config_items in six.iteritems(ENV_CONFS): for name, param_format_str in six.iteritems(config_items): configs.append( p.Config(name, service, "node", default_value=1024, priority=1, config_type="int")) for service, config_items in six.iteritems(SPARK_CONFS): for item in config_items['OPTIONS']: cfg = p.Config(name=item["name"], description=item["description"], default_value=item["default"], applicable_target=service, scope="cluster", is_optional=True, priority=item["priority"]) configs.append(cfg) configs.append(DECOMMISSIONING_TIMEOUT) configs.append(ENABLE_SWIFT) configs.append(DATANODES_STARTUP_TIMEOUT) if CONF.enable_data_locality: configs.append(ENABLE_DATA_LOCALITY) return configs
def _create_config_obj(self, item, target='general', scope='cluster', high_priority=False): def _prepare_value(value): if isinstance(value, str): return value.strip().lower() return value conf_name = _prepare_value(item.get('name', None)) conf_value = _prepare_value(item.get('value', None)) if not conf_name: raise ex.HadoopProvisionError(_("Config missing 'name'")) if conf_value is None: raise ex.PluginInvalidDataException( _("Config '%s' missing 'value'") % conf_name) if high_priority or item.get('priority', 2) == 1: priority = 1 else: priority = 2 return p.Config( name=conf_name, applicable_target=target, scope=scope, config_type=item.get('config_type', "string"), config_values=item.get('config_values', None), default_value=conf_value, is_optional=item.get('is_optional', True), description=item.get('description', None), priority=priority)
def init_xml_configs(xml_confs): configs = [] for service, config_lists in six.iteritems(xml_confs): for config_list in config_lists: for config in config_list: if config['name'] not in HIDDEN_CONFS: cfg = p.Config(config['name'], service, "node", is_optional=True, config_type="string", default_value=str(config['value']), description=config['description']) if cfg.default_value in ["true", "false"]: cfg.config_type = "bool" cfg.default_value = (cfg.default_value == 'true') elif utils.is_int(cfg.default_value): cfg.config_type = "int" cfg.default_value = int(cfg.default_value) if config['name'] in CLUSTER_WIDE_CONFS: cfg.scope = 'cluster' if config['name'] in PRIORITY_1_CONFS: cfg.priority = 1 configs.append(cfg) return configs
def _initialize(self, config): for configuration in self.config['configurations']: for service_property in configuration['properties']: config = p.Config(service_property['name'], self._get_target( service_property['applicable_target']), service_property['scope'], config_type= service_property['config_type'], default_value=service_property ['default_value'], is_optional=service_property[ 'is_optional'], description=service_property[ 'description']) setattr(config, 'tag', configuration['tag'].rsplit(".", 1)[0]) self.config_items.append(config) #TODO(jspeidel): an assumption is made that property names # are unique across configuration sections which is dangerous property_name = service_property['name'] # if property already exists, throw an exception if property_name in self.config_mapper: # internal error # ambari-config-resource contains duplicates raise exceptions.InvalidDataException( 'Internal Error. Duplicate property ' 'name detected: %s' % property_name) self.config_mapper[service_property['name']] = \ self._get_target( service_property['applicable_target'])
def _initialise_configs(): configs = [] for service, config_lists in XML_CONFS.iteritems(): for config_list in config_lists: for config in config_list: if config['name'] not in HIDDEN_CONFS: cfg = p.Config(config['name'], service, "cluster", is_optional=True, config_type="string", default_value=str(config['value']), description=config['description']) if config.get('type'): cfg.config_type = CFG_TYPE[config['type']] if cfg.config_type == 'bool': cfg.default_value = cfg.default_value == 'true' if cfg.config_type == 'int': if cfg.default_value: cfg.default_value = int(cfg.default_value) else: cfg.config_type = 'string' if config['name'] in PRIORITY_1_CONFS: cfg.priority = 1 configs.append(cfg) configs.append(IDH_TARBALL_URL) configs.append(IDH_REPO_URL) configs.append(OS_REPO_URL) configs.append(OOZIE_EXT22_URL) configs.append(ENABLE_SWIFT) return configs
def mapper(item): req = ['name', 'applicable_target', 'scope'] opt = [ 'description', 'config_type', 'config_values', 'default_value', 'is_optional', 'priority' ] kargs = dict((k, item[k]) for k in req + opt if k in item) return p.Config(**kargs)
def init_env_configs(env_confs): configs = [] for service, config_items in six.iteritems(env_confs): for name, value in six.iteritems(config_items): configs.append(p.Config(name, service, "node", default_value=value, priority=1, config_type="int")) return configs
def _init_env_configs(): configs = [] for service, config_items in ENV_CONFS.iteritems(): for name, value in config_items.iteritems(): configs.append(p.Config(name, service, "node", default_value=value, priority=1, config_type="int")) return configs
def _init_configs(confs, app_target, scope): cfgs = [] for cfg in confs: priority = 1 if cfg['name'] in priority_one_confs else 2 c = p.Config(cfg['name'], app_target, scope, priority=priority, default_value=_prepare_value(cfg['value']), description=cfg['desc'], is_optional=True) cfgs.append(c) return cfgs
def get_version_config(self, versions): return p.Config( name='%s Version' % self._ui_name, applicable_target=self.ui_name, scope='cluster', config_type='dropdown', config_values=[(v, v) for v in sorted(versions, reverse=True)], is_optional=False, description=_('Specify the version of the service'), priority=1)
def _initialise_configs(): configs = [] for service, config_lists in XML_CONFS.iteritems(): for config_list in config_lists: for config in config_list: if config['name'] not in HIDDEN_CONFS: cfg = p.Config(config['name'], service, "node", is_optional=True, config_type="string", default_value=str(config['value']), description=config['description']) if cfg.default_value in ["true", "false"]: cfg.config_type = "bool" cfg.default_value = (cfg.default_value == 'true') elif types.is_int(cfg.default_value): cfg.config_type = "int" cfg.default_value = int(cfg.default_value) if config['name'] in CLUSTER_WIDE_CONFS: cfg.scope = 'cluster' if config['name'] in PRIORITY_1_CONFS: cfg.priority = 1 configs.append(cfg) for service, config_items in ENV_CONFS.iteritems(): for name, param_format_str in config_items.iteritems(): configs.append( p.Config(name, service, "node", default_value=1024, priority=1, config_type="int")) configs.append(ENABLE_SWIFT) configs.append(ENABLE_MYSQL) configs.append(DECOMMISSIONING_TIMEOUT) if CONF.enable_data_locality: configs.append(ENABLE_DATA_LOCALITY) return configs
def _init_ng_configs(self, confs, app_target, scope): prepare_value = lambda x: x.replace('\n', ' ') if x else "" cfgs = [] for cfg in confs: priority = 1 if cfg['name'] in self.priority_one_confs else 2 c = p.Config(cfg['name'], app_target, scope, priority=priority, default_value=prepare_value(cfg['value']), description=cfg['desc'], is_optional=True) cfgs.append(c) return cfgs
def _get_zookeeper_configs(): zk_configs = [] for service, config_items in six.iteritems(c_helper.ZOOKEEPER_CONFS): for item in config_items['OPTIONS']: cfg = p.Config(name=item["name"], description=item["description"], default_value=item["default"], applicable_target=service, scope="cluster", is_optional=True, priority=item["priority"]) zk_configs.append(cfg) return zk_configs
def _get_spark_configs(): spark_configs = [] for service, config_items in six.iteritems(SPARK_CONFS): for item in config_items['OPTIONS']: cfg = p.Config(name=item["name"], description=item["description"], default_value=item["default"], applicable_target=service, scope="cluster", is_optional=True, priority=item["priority"]) spark_configs.append(cfg) return spark_configs
def test_get_version_config_objects(self): actual = self.plugin_spec.get_version_config_objects() expected = [ p.Config(name='service_2 Version', applicable_target='service_2', scope='cluster', config_type='dropdown', config_values=[('v1', 'v1'), ('v2', 'v2')], is_optional=False, priority=1) ] m_actual = map(lambda i: i.to_dict(), actual) m_expected = map(lambda i: i.to_dict(), expected) self.assertItemsEqual(m_expected, m_actual)
def load_configs(version): if OBJ_CONFIGS.get(version): return OBJ_CONFIGS[version] cfg_path = "plugins/ambari/resources/configs-%s.json" % version vanilla_cfg = jsonutils.loads(files.get_file_text(cfg_path)) CONFIGS[version] = vanilla_cfg sahara_cfg = [hdp_repo_cfg, hdp_utils_repo_cfg, use_base_repos_cfg] for service, confs in vanilla_cfg.items(): for k, v in confs.items(): sahara_cfg.append(provisioning.Config( k, _get_service_name(service), _get_param_scope(k), default_value=v)) sahara_cfg.extend(_get_ha_params()) OBJ_CONFIGS[version] = sahara_cfg return sahara_cfg
def _initialize(self, config): for configuration in self.config['configurations']: for service_property in configuration['properties']: config = p.Config(service_property['name'], self._get_target( service_property['applicable_target']), service_property['scope'], config_type=service_property['config_type'], default_value=service_property ['default_value'], is_optional=service_property[ 'is_optional'], description=service_property[ 'description']) setattr(config, 'tag', configuration['tag'].rsplit(".", 1)[0]) self.config_items.append(config) # TODO(jspeidel): an assumption is made that property names # are unique across configuration sections which is dangerous property_name = service_property['name'] # if property already exists, throw an exception if property_name in self.config_mapper: # internal error # ambari-config-resource contains duplicates raise exceptions.InvalidDataException( _('Internal Error. Duplicate property ' 'name detected: %s') % property_name) self.config_mapper[service_property['name']] = ( self._get_target( service_property['applicable_target'])) host_reg_timeout = copy.copy(HOST_REGISTRATIONS_TIMEOUT) setattr(host_reg_timeout, 'tag', 'global') self.config_items.append(host_reg_timeout) self.config_mapper[host_reg_timeout.name] = 'global' if self.hadoop_version == '2.0.6': dec_timeout = copy.copy(DECOMMISSIONING_TIMEOUT) setattr(dec_timeout, 'tag', 'global') self.config_items.append(dec_timeout) self.config_mapper[dec_timeout.name] = 'global'
}, "HDFS": { 'Name Node Heap Size': 'HADOOP_NAMENODE_OPTS=\\"-Xmx%sm\\"', 'Secondary Name Node Heap Size': 'HADOOP_SECONDARYNAMENODE_OPTS=' '\\"-Xmx%sm\\"', 'Data Node Heap Size': 'HADOOP_DATANODE_OPTS=\\"-Xmx%sm\\"' }, "JobFlow": { 'Oozie Heap Size': 'CATALINA_OPTS -Xmx%sm' } } ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster', config_type="bool", priority=1, default_value=True, is_optional=True) ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster', config_type="bool", priority=1, default_value=True, is_optional=True) ENABLE_MYSQL = p.Config('Enable MySQL', 'general', 'cluster',
class Hue(s.Service): THRIFT_VERSIONS = [5, 7] THRIFT_VERSION = p.Config(name="Thrift version", applicable_target="Hue", scope='cluster', config_type="dropdown", config_values=[(v, v) for v in THRIFT_VERSIONS], priority=1, description=_('Specifies thrift version.')) def __init__(self): super(Hue, self).__init__() self._name = 'hue' self._ui_name = 'Hue' self._node_processes = [HUE] self._ui_info = None self._validation_rules = [ vu.exactly(1, HUE), vu.on_same_node(HUE, httpfs.HTTP_FS), vu.on_same_node(HUE_LIVY, spark.SPARK_SLAVE), ] self._priority = 2 def get_ui_info(self, cluster_context): # Hue uses credentials of the administrative user (PAM auth) return [('HUE', HUE, { s.SERVICE_UI: 'http://%s:8888', 'Username': '******', 'Password': pu.get_mapr_password(cluster_context.cluster) })] def get_configs(self): return [Hue.THRIFT_VERSION] def conf_dir(self, cluster_context): return '%s/desktop/conf' % self.home_dir(cluster_context) def get_config_files(self, cluster_context, configs, instance=None): template = 'plugins/mapr/services/hue/resources/hue_%s.template' # hue.ini hue_ini = bcf.TemplateFile("hue.ini") hue_ini.remote_path = self.conf_dir(cluster_context) hue_ini.parse(files.get_file_text(template % self.version)) hue_ini.add_properties(self._get_hue_ini_props(cluster_context)) hue_ini.add_property("thrift_version", configs[self.THRIFT_VERSION.name]) # # hue.sh hue_sh_template = 'plugins/mapr/services/hue/' \ 'resources/hue_sh_%s.template' hue_sh = bcf.TemplateFile("hue.sh") hue_sh.remote_path = self.home_dir(cluster_context) + '/bin' hue_sh.parse(files.get_file_text(hue_sh_template % self.version)) hue_sh.add_property('hadoop_version', cluster_context.hadoop_version) hue_sh.mode = 777 hue_instances = cluster_context.get_instances(HUE) for instance in hue_instances: if instance not in cluster_context.changed_instances(): cluster_context.should_be_restarted[self] += [instance] return [hue_ini, hue_sh] def _get_hue_ini_props(self, cluster_context): db_instance = mysql.MySQL.get_db_instance(cluster_context) is_yarn = cluster_context.cluster_mode == 'yarn' hue_specs = mysql.MySQL.HUE_SPECS rdbms_specs = mysql.MySQL.RDBMS_SPECS result = { 'db_host': db_instance.internal_ip, 'hue_name': hue_specs.db_name, 'hue_user': hue_specs.user, 'hue_password': hue_specs.password, 'rdbms_name': rdbms_specs.db_name, 'rdbms_user': rdbms_specs.user, 'rdbms_password': rdbms_specs.password, 'resource_manager_uri': cluster_context.resource_manager_uri, 'yarn_mode': is_yarn, 'rm_host': cluster_context.get_instance_ip(yarn.RESOURCE_MANAGER), 'webhdfs_url': cluster_context.get_instance_ip(httpfs.HTTP_FS), 'jt_host': cluster_context.get_instance_ip(mr.JOB_TRACKER), 'oozie_host': cluster_context.get_instance_ip(oozie.OOZIE), 'sqoop_host': cluster_context.get_instance_ip(sqoop.SQOOP_2_SERVER), 'impala_host': cluster_context.get_instance_ip(impala.IMPALA_STATE_STORE), 'zk_hosts_with_port': cluster_context.get_zookeeper_nodes_ip_with_port(), 'secret_key': self._generate_secret_key() } hive_host = cluster_context.get_instance(hive.HIVE_SERVER_2) if hive_host: hive_service = cluster_context.get_service(hive.HIVE_SERVER_2) result.update({ 'hive_host': hive_host.internal_ip, 'hive_version': hive_service.version, 'hive_conf_dir': hive_service.conf_dir(cluster_context), }) hbase_host = cluster_context.get_instance(hbase.HBASE_THRIFT) if hbase_host: hbase_service = cluster_context.get_service(hbase.HBASE_THRIFT) result.update({ 'hbase_host': hbase_host.internal_ip, 'hbase_conf_dir': hbase_service.conf_dir(cluster_context), }) livy_host = cluster_context.get_instance(HUE_LIVY) if livy_host: result.update({'livy_host': livy_host.internal_ip}) sentry_host = cluster_context.get_instance(sentry.SENTRY) if sentry_host: ui_name = sentry.Sentry().ui_name sentry_version = cluster_context.get_chosen_service_version( ui_name) sentry_service = cluster_context. \ _find_service_instance(ui_name, sentry_version) result.update({ 'sentry_host': sentry_host.internal_ip, 'sentry_conf': sentry_service.conf_dir(cluster_context) }) return result def post_install(self, cluster_context, instances): hue_instance = cluster_context.get_instance(HUE) @el.provision_event(name=_("Migrating Hue database"), instance=hue_instance) def migrate_database(remote, cluster_context): hue_home = self.home_dir(cluster_context) cmd = '%(activate)s && %(syncdb)s && %(migrate)s' args = { 'activate': 'source %s/build/env/bin/activate' % hue_home, 'syncdb': '%s/build/env/bin/hue syncdb --noinput' % hue_home, 'migrate': '%s/build/env/bin/hue migrate' % hue_home, } remote.execute_command(cmd % args, run_as_root=True, timeout=600) def hue_syncdb_workround(remote): cmd = 'printf "/opt/mapr/lib\n$JAVA_HOME/jre/lib/amd64/server\n"' \ ' | tee /etc/ld.so.conf.d/mapr-hue.conf && ldconfig' remote.execute_command(cmd, run_as_root=True) def centos7_workaround(remote): cmd = 'ln -s /lib64/libsasl2.so.3.0.0 /lib64/libsasl2.so.2' \ ' && rpm -ivh --nodeps http://yum.mariadb.org/5.5.49/' \ 'rhel7-amd64/rpms/MariaDB-5.5.49-centos7-x86_64-compat.rpm' remote.execute_command(cmd, run_as_root=True, raise_when_error=False) with hue_instance.remote() as r: LOG.debug("Executing Hue database migration") # workaround for centos7 if cluster_context.distro_version.split('.')[0] == '7': centos7_workaround(r) # temporary workaround to prevent failure of db migrate on mapr 5.2 hue_syncdb_workround(r) migrate_database(r, cluster_context) self._copy_hive_configs(cluster_context, hue_instance) self._install_jt_plugin(cluster_context, hue_instance) self._set_service_dir_owner(cluster_context, instances) def _copy_hive_configs(self, cluster_context, hue_instance): hive_server = cluster_context.get_instance(hive.HIVE_SERVER_2) if not hive_server or hive_server == hue_instance: LOG.debug('No Hive Servers found. Skip') return hive_service = cluster_context.get_service(hive.HIVE_SERVER_2) hive_conf_dir = hive_service.conf_dir(cluster_context) g.copy(hive_conf_dir, hive_server, hive_conf_dir, hue_instance, 'root') def update(self, cluster_context, instances=None): if self._should_restart(cluster_context, instances): self.restart(instances) def post_start(self, cluster_context, instances): self.update(cluster_context, instances=instances) def _should_restart(self, cluster_context, instances): app_services = [ impala.Impala(), hive.Hive(), hbase.HBase(), sqoop.Sqoop2(), spark.SparkOnYarn(), ] instances = [ cluster_context.filter_instances(instances, service=service) for service in app_services ] return bool(g.unique_list(itertools.chain(*instances))) def jt_plugin_path(self, cluster_context): path = ('%(home)s/desktop/libs/hadoop/java-lib' '/hue-plugins-%(version)s-mapr.jar') args = { 'home': self.home_dir(cluster_context), 'version': self.version, } return path % args @el.provision_event(name="Install Hue Job Tracker plugin", instance_reference=2) def _install_jt_plugin(self, cluster_context, hue_instance): LOG.debug("Copying Hue JobTracker plugin") job_trackers = cluster_context.get_instances(mr.JOB_TRACKER) if not job_trackers: LOG.debug('No JobTrackers found. Skip') return jt_plugin_src = self.jt_plugin_path(cluster_context) jt_plugin_dest = cluster_context.hadoop_lib + '/jt_plugin.jar' for jt in job_trackers: g.copy(jt_plugin_src, hue_instance, jt_plugin_dest, jt, 'root') def _generate_secret_key(self, length=80): ascii_alphanum = string.ascii_letters + string.digits generator = random.SystemRandom() return ''.join(generator.choice(ascii_alphanum) for _ in range(length))
def test_get_cluster_config_value(self): ctx = self._get_context() conf = p.Config('key', 'Service', 'cluster') self.assertEqual('value', ctx._get_cluster_config_value(conf)) not_set = p.Config('nonset', 'Service', 'cluster') self.assertIsNone(ctx._get_cluster_config_value(not_set))
'priority': 2, }] } } ENV_CONFS = { "HDFS": { 'Name Node Heap Size': 'HADOOP_NAMENODE_OPTS=\\"-Xmx%sm\\"', 'Data Node Heap Size': 'HADOOP_DATANODE_OPTS=\\"-Xmx%sm\\"' } } ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster', config_type="bool", priority=1, default_value=True, is_optional=True) HIDDEN_CONFS = [ 'fs.defaultFS', 'dfs.namenode.name.dir', 'dfs.datanode.data.dir' ] CLUSTER_WIDE_CONFS = [ 'dfs.block.size', 'dfs.permissions', 'dfs.replication', 'dfs.replication.min', 'dfs.replication.max', 'io.file.buffer.size' ] PRIORITY_1_CONFS = [ 'dfs.datanode.du.reserved', 'dfs.datanode.failed.volumes.tolerated',
class MapRFS(s.Service): _CREATE_DISK_LIST = 'plugins/mapr/resources/create_disk_list_file.sh' _DISK_SETUP_CMD = '/opt/mapr/server/disksetup -F /tmp/disk.list' _DISK_SETUP_TIMEOUT = 600 ENABLE_MAPR_DB_NAME = 'Enable MapR-DB' HEAP_SIZE_PERCENT_NAME = 'MapR-FS heap size percent' ENABLE_MAPR_DB_CONFIG = p.Config( name=ENABLE_MAPR_DB_NAME, applicable_target='general', scope='cluster', config_type="bool", priority=1, default_value=True, description=_('Specifies that MapR-DB is in use.')) HEAP_SIZE_PERCENT = p.Config( name=HEAP_SIZE_PERCENT_NAME, applicable_target='MapRFS', scope='cluster', config_type="int", priority=1, default_value=8, description=_( 'Specifies heap size for MapR-FS in percents of maximum value.')) def __init__(self): super(MapRFS, self).__init__() self._ui_name = 'MapRFS' self._node_processes = [CLDB, FILE_SERVER, NFS] self._ui_info = [ ('Container Location Database (CLDB)', CLDB, { s.SERVICE_UI: 'http://%s:7221' }), ] self._validation_rules = [ vu.at_least(1, CLDB), vu.each_node_has(FILE_SERVER), vu.on_same_node(CLDB, FILE_SERVER), vu.has_volumes(), ] def service_dir(self, cluster_context): return def home_dir(self, cluster_context): return def conf_dir(self, cluster_context): return '%s/conf' % cluster_context.mapr_home def post_install(self, cluster_context, instances): LOG.debug('Initializing MapR FS') instances = instances or cluster_context.get_instances() file_servers = cluster_context.filter_instances(instances, FILE_SERVER) cpo.add_provisioning_step(cluster_context.cluster.id, _("Initializing MapR-FS"), len(file_servers)) with context.ThreadGroup() as tg: for instance in file_servers: tg.spawn('init-mfs-%s' % instance.id, self._init_mfs_instance, instance) LOG.info('MapR FS successfully initialized') @el.provision_event(instance_reference=1) def _init_mfs_instance(self, instance): self._generate_disk_list_file(instance, self._CREATE_DISK_LIST) self._execute_disksetup(instance) def _generate_disk_list_file(self, instance, path_to_disk_setup_script): LOG.debug('Creating disk list file') g.run_script(instance, path_to_disk_setup_script, 'root', *instance.storage_paths()) def _execute_disksetup(self, instance): with instance.remote() as rmt: rmt.execute_command(self._DISK_SETUP_CMD, run_as_root=True, timeout=self._DISK_SETUP_TIMEOUT) def get_configs(self): return [MapRFS.ENABLE_MAPR_DB_CONFIG, MapRFS.HEAP_SIZE_PERCENT] def get_config_files(self, cluster_context, configs, instance=None): default_path = 'plugins/mapr/services/maprfs/resources/cldb.conf' cldb_conf = bcf.PropertiesFile("cldb.conf") cldb_conf.remote_path = self.conf_dir(cluster_context) if instance: cldb_conf.fetch(instance) cldb_conf.parse(files.get_file_text(default_path)) cldb_conf.add_properties(self._get_cldb_conf_props(cluster_context)) warden_conf = bcf.PropertiesFile("warden.conf") warden_conf.remote_path = "/opt/mapr/conf/" if instance: warden_conf.fetch(instance) warden_conf.add_properties({ 'service.command.mfs.heapsize.percent': configs[self.HEAP_SIZE_PERCENT_NAME] }) return [cldb_conf, warden_conf] def _get_cldb_conf_props(self, cluster_context): zookeepers = cluster_context.get_zookeeper_nodes_ip_with_port() return {'cldb.zookeeper.servers': zookeepers}
class ConfigHelperV570(c_h.ConfigHelper): path_to_config = 'plugins/cdh/v5_7_0/resources/' CDH5_UBUNTU_REPO = ('deb [arch=amd64] http://archive.cloudera.com/cdh5' '/ubuntu/trusty/amd64/cdh trusty-cdh5.7.0 contrib' '\ndeb-src http://archive.cloudera.com/cdh5/ubuntu' '/trusty/amd64/cdh trusty-cdh5.7.0 contrib') DEFAULT_CDH5_UBUNTU_REPO_KEY_URL = ( 'http://archive.cloudera.com/cdh5/ubuntu' '/trusty/amd64/cdh/archive.key') CM5_UBUNTU_REPO = ('deb [arch=amd64] http://archive.cloudera.com/cm5' '/ubuntu/trusty/amd64/cm trusty-cm5.7.0 contrib' '\ndeb-src http://archive.cloudera.com/cm5/ubuntu' '/trusty/amd64/cm trusty-cm5.7.0 contrib') DEFAULT_CM5_UBUNTU_REPO_KEY_URL = ('http://archive.cloudera.com/cm5/ubuntu' '/trusty/amd64/cm/archive.key') CDH5_CENTOS_REPO = ('[cloudera-cdh5]' '\nname=Cloudera\'s Distribution for Hadoop, Version 5' '\nbaseurl=http://archive.cloudera.com/cdh5/redhat/6' '/x86_64/cdh/5.7.0/' '\ngpgkey = http://archive.cloudera.com/cdh5/redhat/6' '/x86_64/cdh/RPM-GPG-KEY-cloudera' '\ngpgcheck = 1') CM5_CENTOS_REPO = ('[cloudera-manager]' '\nname=Cloudera Manager' '\nbaseurl=http://archive.cloudera.com/cm5/redhat/6' '/x86_64/cm/5.7.0/' '\ngpgkey = http://archive.cloudera.com/cm5/redhat/6' '/x86_64/cm/RPM-GPG-KEY-cloudera' '\ngpgcheck = 1') KEY_TRUSTEE_UBUNTU_REPO_URL = ('http://archive.cloudera.com/navigator-' 'keytrustee5/ubuntu/trusty/amd64/navigator-' 'keytrustee/cloudera.list') DEFAULT_KEY_TRUSTEE_UBUNTU_REPO_KEY_URL = ( 'http://archive.cloudera.com/navigator-' 'keytrustee5/ubuntu/trusty/amd64/navigator-' 'keytrustee/archive.key') KEY_TRUSTEE_CENTOS_REPO_URL = ('http://archive.cloudera.com/navigator-' 'keytrustee5/redhat/6/x86_64/navigator-' 'keytrustee/navigator-keytrustee5.repo') DEFAULT_SWIFT_LIB_URL = ( 'https://repository.cloudera.com/artifactory/repo/org' '/apache/hadoop/hadoop-openstack/2.6.0-cdh5.7.0' '/hadoop-openstack-2.6.0-cdh5.7.0.jar') SWIFT_LIB_URL = p.Config( 'Hadoop OpenStack library URL', 'general', 'cluster', priority=1, default_value=DEFAULT_SWIFT_LIB_URL, description=("Library that adds Swift support to CDH. The file" " will be downloaded by VMs.")) HIVE_SERVER2_SENTRY_SAFETY_VALVE = f.get_file_text( path_to_config + 'hive-server2-sentry-safety.xml') HIVE_METASTORE_SENTRY_SAFETY_VALVE = f.get_file_text( path_to_config + 'hive-metastore-sentry-safety.xml') SENTRY_IMPALA_CLIENT_SAFETY_VALVE = f.get_file_text( path_to_config + 'sentry-impala-client-safety.xml') def __init__(self): super(ConfigHelperV570, self).__init__() self.priority_one_confs = self._load_json(self.path_to_config + 'priority-one-confs.json') self._init_all_ng_plugin_configs()
CM5_CENTOS_REPO = ('[cloudera-manager]' '\nname=Cloudera Manager' '\nbaseurl=http://archive.cloudera.com/cm5/redhat/6' '/x86_64/cm/5.0.0/' '\ngpgkey = http://archive.cloudera.com/cm5/redhat/6' '/x86_64/cm/RPM-GPG-KEY-cloudera' '\ngpgcheck = 1') DEFAULT_SWIFT_LIB_URL = ('https://repository.cloudera.com/artifactory/repo/org' '/apache/hadoop/hadoop-openstack/2.3.0-cdh5.0.0' '/hadoop-openstack-2.3.0-cdh5.0.0.jar') DEFAULT_EXTJS_LIB_URL = 'http://dev.sencha.com/deploy/ext-2.2.zip' CDH5_REPO_URL = p.Config('CDH5 repo list URL', 'general', 'cluster', priority=1, default_value="") CDH5_REPO_KEY_URL = p.Config('CDH5 repo key URL (for debian-based only)', 'general', 'cluster', priority=1, default_value="") CM5_REPO_URL = p.Config('CM5 repo list URL', 'general', 'cluster', priority=1, default_value="")
'\n</property>' '\n<property>' '\n <name>sentry.service.client.server.rpc-address</name>' '\n <value>hostname</value>' '\n</property>' '\n<property>' '\n <name>sentry.service.client.server.rpc-connection-timeout</name>' '\n <value>200000</value>' '\n</property>' '\n<property>' '\n <name>sentry.service.security.mode</name>' '\n <value>none</value>' '\n</property>') CDH5_REPO_URL = p.Config( 'CDH5 repo list URL', 'general', 'cluster', priority=1, default_value="") CDH5_REPO_KEY_URL = p.Config( 'CDH5 repo key URL (for debian-based only)', 'general', 'cluster', priority=1, default_value="") CM5_REPO_URL = p.Config( 'CM5 repo list URL', 'general', 'cluster', priority=1, default_value="") CM5_REPO_KEY_URL = p.Config( 'CM5 repo key URL (for debian-based only)', 'general', 'cluster', priority=1, default_value="") ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster',
class ConfigHelper(object): path_to_config = '' CDH5_REPO_URL = p.Config( 'CDH5 repo list URL', 'general', 'cluster', priority=1, default_value="") CDH5_REPO_KEY_URL = p.Config( 'CDH5 repo key URL (for debian-based only)', 'general', 'cluster', priority=1, default_value="") CM5_REPO_URL = p.Config( 'CM5 repo list URL', 'general', 'cluster', priority=1, default_value="") CM5_REPO_KEY_URL = p.Config( 'CM5 repo key URL (for debian-based only)', 'general', 'cluster', priority=1, default_value="") ENABLE_HBASE_COMMON_LIB = p.Config( 'Enable HBase Common Lib', 'general', 'cluster', config_type='bool', priority=1, default_value=True) ENABLE_SWIFT = p.Config( 'Enable Swift', 'general', 'cluster', config_type='bool', priority=1, default_value=True) DEFAULT_SWIFT_LIB_URL = ( 'https://repository.cloudera.com/artifactory/repo/org' '/apache/hadoop/hadoop-openstack/2.6.0-cdh5.5.0' '/hadoop-openstack-2.6.0-cdh5.5.0.jar') SWIFT_LIB_URL = p.Config( 'Hadoop OpenStack library URL', 'general', 'cluster', priority=1, default_value=DEFAULT_SWIFT_LIB_URL, description=("Library that adds Swift support to CDH. The file" " will be downloaded by VMs.")) DEFAULT_EXTJS_LIB_URL = ( 'https://tarballs.openstack.org/sahara-extra/dist/common-artifacts/' 'ext-2.2.zip') EXTJS_LIB_URL = p.Config( "ExtJS library URL", 'general', 'cluster', priority=1, default_value=DEFAULT_EXTJS_LIB_URL, description=("Ext 2.2 library is required for Oozie Web Console. " "The file will be downloaded by VMs with oozie.")) _default_executor_classpath = ":".join( ['/usr/lib/hadoop/lib/jackson-core-asl-1.8.8.jar', '/usr/lib/hadoop-mapreduce/hadoop-openstack.jar']) EXECUTOR_EXTRA_CLASSPATH = p.Config( 'Executor extra classpath', 'Spark', 'cluster', priority=2, default_value=_default_executor_classpath, description='Value for spark.executor.extraClassPath in ' 'spark-defaults.conf (default: %s)' % _default_executor_classpath) KMS_REPO_URL = p.Config( 'KMS repo list URL', 'general', 'cluster', priority=1, default_value="") KMS_REPO_KEY_URL = p.Config( 'KMS repo key URL (for debian-based only)', 'general', 'cluster', priority=1, default_value="") REQUIRE_ANTI_AFFINITY = p.Config( 'Require Anti Affinity', 'general', 'cluster', config_type='bool', priority=2, default_value=True) AWAIT_AGENTS_TIMEOUT = p.Config( 'Await Cloudera agents timeout', 'general', 'cluster', config_type='int', priority=1, default_value=300, is_optional=True, description="Timeout for Cloudera agents connecting to" " Cloudera Manager, in seconds") AWAIT_MANAGER_STARTING_TIMEOUT = p.Config( 'Timeout for Cloudera Manager starting', 'general', 'cluster', config_type='int', priority=1, default_value=300, is_optional=True, description='Timeout for Cloudera Manager starting, in seconds') def __new__(cls): # make it a singleton if not hasattr(cls, '_instance'): cls._instance = super(ConfigHelper, cls).__new__(cls) setattr(cls, '__init__', cls.decorate_init(cls.__init__)) return cls._instance @classmethod def decorate_init(cls, f): """decorate __init__ to prevent multiple calling.""" def wrap(*args, **kwargs): if not hasattr(cls, '_init'): f(*args, **kwargs) cls._init = True return wrap def __init__(self): self.ng_plugin_configs = [] self.priority_one_confs = {} def _load_json(self, path_to_file): data = f.get_file_text(path_to_file) return json.loads(data) def _init_ng_configs(self, confs, app_target, scope): prepare_value = lambda x: x.replace('\n', ' ') if x else "" cfgs = [] for cfg in confs: priority = 1 if cfg['name'] in self.priority_one_confs else 2 c = p.Config(cfg['name'], app_target, scope, priority=priority, default_value=prepare_value(cfg['value']), description=cfg['desc'], is_optional=True) cfgs.append(c) return cfgs def _init_all_ng_plugin_configs(self): self.hdfs_confs = self._load_and_init_configs( 'hdfs-service.json', 'HDFS', 'cluster') self.namenode_confs = self._load_and_init_configs( 'hdfs-namenode.json', 'NAMENODE', 'node') self.datanode_confs = self._load_and_init_configs( 'hdfs-datanode.json', 'DATANODE', 'node') self.secnamenode_confs = self._load_and_init_configs( 'hdfs-secondarynamenode.json', 'SECONDARYNAMENODE', 'node') self.hdfs_gateway_confs = self._load_and_init_configs( 'hdfs-gateway.json', 'HDFS_GATEWAY', 'node') self.journalnode_confs = self._load_and_init_configs( 'hdfs-journalnode.json', 'JOURNALNODE', 'node') self.yarn_confs = self._load_and_init_configs( 'yarn-service.json', 'YARN', 'cluster') self.resourcemanager_confs = self._load_and_init_configs( 'yarn-resourcemanager.json', 'RESOURCEMANAGER', 'node') self.nodemanager_confs = self._load_and_init_configs( 'yarn-nodemanager.json', 'NODEMANAGER', 'node') self.jobhistory_confs = self._load_and_init_configs( 'yarn-jobhistory.json', 'JOBHISTORY', 'node') self.yarn_gateway_conf = self._load_and_init_configs( 'yarn-gateway.json', 'YARN_GATEWAY', 'node') self.oozie_service_confs = self._load_and_init_configs( 'oozie-service.json', 'OOZIE', 'cluster') self.oozie_role_confs = self._load_and_init_configs( 'oozie-oozie_server.json', 'OOZIE', 'node') self.hive_service_confs = self._load_and_init_configs( 'hive-service.json', 'HIVE', 'cluster') self.hive_metastore_confs = self._load_and_init_configs( 'hive-hivemetastore.json', 'HIVEMETASTORE', 'node') self.hive_hiveserver_confs = self._load_and_init_configs( 'hive-hiveserver2.json', 'HIVESERVER', 'node') self.hive_webhcat_confs = self._load_and_init_configs( 'hive-webhcat.json', 'WEBHCAT', 'node') self.hue_service_confs = self._load_and_init_configs( 'hue-service.json', 'HUE', 'cluster') self.hue_role_confs = self._load_and_init_configs( 'hue-hue_server.json', 'HUE', 'node') self.spark_service_confs = self._load_and_init_configs( 'spark-service.json', 'SPARK_ON_YARN', 'cluster') self.spark_role_confs = self._load_and_init_configs( 'spark-spark_yarn_history_server.json', 'SPARK_ON_YARN', 'node') self.zookeeper_server_confs = self._load_and_init_configs( 'zookeeper-service.json', 'ZOOKEEPER', 'cluster') self.zookeeper_service_confs = self._load_and_init_configs( 'zookeeper-server.json', 'ZOOKEEPER', 'node') self.hbase_confs = self._load_and_init_configs( 'hbase-service.json', 'HBASE', 'cluster') self.master_confs = self._load_and_init_configs( 'hbase-master.json', 'MASTER', 'node') self.regionserver_confs = self._load_and_init_configs( 'hbase-regionserver.json', 'REGIONSERVER', 'node') self.flume_service_confs = self._load_and_init_configs( 'flume-service.json', 'FLUME', 'cluster') self.flume_agent_confs = self._load_and_init_configs( 'flume-agent.json', 'FLUME', 'node') self.sentry_service_confs = self._load_and_init_configs( 'sentry-service.json', 'SENTRY', 'cluster') self.sentry_server_confs = self._load_and_init_configs( 'sentry-sentry_server.json', 'SENTRY', 'node') self.solr_service_confs = self._load_and_init_configs( 'solr-service.json', 'SOLR', 'cluster') self.solr_server_confs = self._load_and_init_configs( 'solr-solr_server.json', 'SOLR', 'node') self.sqoop_service_confs = self._load_and_init_configs( 'sqoop-service.json', 'SQOOP', 'cluster') self.sqoop_server_confs = self._load_and_init_configs( 'sqoop-sqoop_server.json', 'SQOOP', 'node') self.ks_indexer_service_confs = self._load_and_init_configs( 'ks_indexer-service.json', 'KS_INDEXER', 'cluster') self.ks_indexer_role_confs = self._load_and_init_configs( 'ks_indexer-hbase_indexer.json', 'KS_INDEXER', 'node') self.impala_service_confs = self._load_and_init_configs( 'impala-service.json', 'IMPALA', 'cluster') self.impala_catalogserver_confs = self._load_and_init_configs( 'impala-catalogserver.json', 'CATALOGSERVER', 'node') self.impala_impalad_confs = self._load_and_init_configs( 'impala-impalad.json', 'IMPALAD', 'node') self.impala_statestore_confs = self._load_and_init_configs( 'impala-statestore.json', 'STATESTORE', 'node') self.kms_service_confs = self._load_and_init_configs( 'kms-service.json', 'KMS', 'cluster') self.kms_kms_confs = self._load_and_init_configs( 'kms-kms.json', 'KMS', 'node') self.kafka_service = self._load_and_init_configs( 'kafka-service.json', 'KAFKA', 'cluster') self.kafka_kafka_broker = self._load_and_init_configs( 'kafka-kafka_broker.json', 'KAFKA', 'node') self.kafka_kafka_mirror_maker = self._load_and_init_configs( 'kafka-kafka_mirror_maker.json', 'KAFKA', 'node') def _load_and_init_configs(self, filename, app_target, scope): confs = self._load_json(self.path_to_config + filename) cfgs = self._init_ng_configs(confs, app_target, scope) self.ng_plugin_configs += cfgs return cfgs def _get_ng_plugin_configs(self): return self.ng_plugin_configs def _get_cluster_plugin_configs(self): return [self.CDH5_REPO_URL, self.CDH5_REPO_KEY_URL, self.CM5_REPO_URL, self.CM5_REPO_KEY_URL, self.ENABLE_SWIFT, self.SWIFT_LIB_URL, self.ENABLE_HBASE_COMMON_LIB, self.EXTJS_LIB_URL, self.AWAIT_MANAGER_STARTING_TIMEOUT, self.AWAIT_AGENTS_TIMEOUT, self.EXECUTOR_EXTRA_CLASSPATH, self.KMS_REPO_URL, self.KMS_REPO_KEY_URL, self.REQUIRE_ANTI_AFFINITY] def get_plugin_configs(self): cluster_wide = self._get_cluster_plugin_configs() ng_wide = self._get_ng_plugin_configs() return cluster_wide + ng_wide def _get_config_value(self, cluster, key): return cluster.cluster_configs.get( 'general', {}).get(key.name, key.default_value) def get_cdh5_repo_url(self, cluster): return self._get_config_value(cluster, self.CDH5_REPO_URL) def get_cdh5_key_url(self, cluster): return self._get_config_value(cluster, self.CDH5_REPO_KEY_URL) def get_cm5_repo_url(self, cluster): return self._get_config_value(cluster, self.CM5_REPO_URL) def get_cm5_key_url(self, cluster): return self._get_config_value(cluster, self.CM5_REPO_KEY_URL) def is_swift_enabled(self, cluster): return self._get_config_value(cluster, self.ENABLE_SWIFT) def is_hbase_common_lib_enabled(self, cluster): return self._get_config_value(cluster, self.ENABLE_HBASE_COMMON_LIB) def get_swift_lib_url(self, cluster): return self._get_config_value(cluster, self.SWIFT_LIB_URL) def get_extjs_lib_url(self, cluster): return self._get_config_value(cluster, self.EXTJS_LIB_URL) def get_kms_key_url(self, cluster): return self._get_config_value(cluster, self.KMS_REPO_KEY_URL) def get_required_anti_affinity(self, cluster): return self._get_config_value(cluster, self.REQUIRE_ANTI_AFFINITY)