def scale_cluster(cluster, instances): if not instances: return if not cmd.is_pre_installed_cdh(instances[0].remote()): _configure_os(instances) _install_packages(instances, PACKAGES) _start_cloudera_agents(instances) _await_agents(instances) for instance in instances: _configure_instance(instance) cu.update_configs(instance) if 'DATANODE' in instance.node_group.node_processes: cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME) _configure_swift_to_inst(instance) if 'DATANODE' in instance.node_group.node_processes: hdfs = cu.get_service('DATANODE', instance=instance) cu.start_roles(hdfs, cu.get_role_name(instance, 'DATANODE')) if 'NODEMANAGER' in instance.node_group.node_processes: yarn = cu.get_service('NODEMANAGER', instance=instance) cu.start_roles(yarn, cu.get_role_name(instance, 'NODEMANAGER'))
def _add_role(instance, process): if process in ["MANAGER"]: return service = cu.get_service(process, instance=instance) role = service.create_role(cu.get_role_name(instance, process), process, instance.fqdn()) role.update_config(_get_configs(process, node_group=instance.node_group))
def _add_role(instance, process): if process in ['MANAGER']: return service = cu.get_service(process, instance=instance) role = service.create_role(cu.get_role_name(instance, process), process, instance.fqdn()) role.update_config(_get_configs(process, node_group=instance.node_group))
def _add_role(instance, process): if process in ['CLOUDERA_MANAGER']: return process = pu.convert_role_showname(process) service = cu.get_service(process, instance=instance) role = service.create_role(cu.get_role_name(instance, process), process, instance.fqdn()) role.update_config(_get_configs(process, node_group=instance.node_group))
def decommission_cluster(cluster, instances): dns = [] nms = [] for i in instances: if 'DATANODE' in i.node_group.node_processes: dns.append(cu.get_role_name(i, 'DATANODE')) if 'NODEMANAGER' in i.node_group.node_processes: nms.append(cu.get_role_name(i, 'NODEMANAGER')) if dns: cu.decommission_nodes(cluster, 'DATANODE', dns) if nms: cu.decommission_nodes(cluster, 'NODEMANAGER', nms) cu.delete_instances(cluster, instances) cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME) cu.refresh_nodes(cluster, 'NODEMANAGER', cu.YARN_SERVICE_NAME)
def _get_configs(service, cluster=None, node_group=None): def get_hadoop_dirs(mount_points, suffix): return ','.join([x + suffix for x in mount_points]) all_confs = {} if cluster: zk_count = v._get_inst_count(cluster, 'ZOOKEEPER_SERVER') hbm_count = v._get_inst_count(cluster, 'HBASE_MASTER') snt_count = v._get_inst_count(cluster, 'SENTRY_SERVER') ks_count = v._get_inst_count(cluster, 'KEY_VALUE_STORE_INDEXER') imp_count = v._get_inst_count(cluster, 'IMPALA_CATALOGSERVER') core_site_safety_valve = '' if c_helper.is_swift_enabled(cluster): configs = swift_helper.get_swift_configs() confs = dict((c['name'], c['value']) for c in configs) core_site_safety_valve = xmlutils.create_elements_xml(confs) all_confs = { 'HDFS': { 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME if zk_count else '', 'dfs_block_local_path_access_user': '******' if imp_count else '', 'core_site_safety_valve': core_site_safety_valve }, 'HIVE': { 'mapreduce_yarn_service': cu.YARN_SERVICE_NAME, 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME if zk_count else '' }, 'OOZIE': { 'mapreduce_yarn_service': cu.YARN_SERVICE_NAME, 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME if zk_count else '' }, 'YARN': { 'hdfs_service': cu.HDFS_SERVICE_NAME, 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME if zk_count else '' }, 'HUE': { 'hive_service': cu.HIVE_SERVICE_NAME, 'oozie_service': cu.OOZIE_SERVICE_NAME, 'sentry_service': cu.SENTRY_SERVICE_NAME if snt_count else '', 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME if zk_count else '' }, 'SPARK_ON_YARN': { 'yarn_service': cu.YARN_SERVICE_NAME }, 'HBASE': { 'hdfs_service': cu.HDFS_SERVICE_NAME, 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME, 'hbase_enable_indexing': 'true' if ks_count else 'false', 'hbase_enable_replication': 'true' if ks_count else 'false' }, 'FLUME': { 'hdfs_service': cu.HDFS_SERVICE_NAME, 'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '' }, 'SENTRY': { 'hdfs_service': cu.HDFS_SERVICE_NAME }, 'SOLR': { 'hdfs_service': cu.HDFS_SERVICE_NAME, 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME }, 'SQOOP': { 'mapreduce_yarn_service': cu.YARN_SERVICE_NAME }, 'KS_INDEXER': { 'hbase_service': cu.HBASE_SERVICE_NAME, 'solr_service': cu.SOLR_SERVICE_NAME }, 'IMPALA': { 'hdfs_service': cu.HDFS_SERVICE_NAME, 'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '', 'hive_service': cu.HIVE_SERVICE_NAME } } hive_confs = { 'HIVE': { 'hive_metastore_database_type': 'postgresql', 'hive_metastore_database_host': pu.get_manager(cluster).internal_ip, 'hive_metastore_database_port': '7432', 'hive_metastore_database_password': db_helper.get_hive_db_password(cluster) } } hue_confs = { 'HUE': { 'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE') } } sentry_confs = { 'SENTRY': { 'sentry_server_database_type': 'postgresql', 'sentry_server_database_host': pu.get_manager(cluster).internal_ip, 'sentry_server_database_port': '7432', 'sentry_server_database_password': db_helper.get_sentry_db_password(cluster) } } all_confs = _merge_dicts(all_confs, hue_confs) all_confs = _merge_dicts(all_confs, hive_confs) all_confs = _merge_dicts(all_confs, sentry_confs) all_confs = _merge_dicts(all_confs, cluster.cluster_configs) if node_group: paths = node_group.storage_paths() ng_default_confs = { 'NAMENODE': { 'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn') }, 'SECONDARYNAMENODE': { 'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn') }, 'DATANODE': { 'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'), 'dfs_datanode_data_dir_perm': 755, 'dfs_datanode_handler_count': 30 }, 'NODEMANAGER': { 'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths, '/yarn/local') }, 'SERVER': { 'maxSessionTimeout': 60000 } } ng_user_confs = pu.convert_process_configs(node_group.node_configs) all_confs = _merge_dicts(all_confs, ng_user_confs) all_confs = _merge_dicts(all_confs, ng_default_confs) return all_confs.get(service, {})
def test_get_role_name(self): inst_mock = mock.Mock() inst_mock.hostname.return_value = 'spam-host' self.assertEqual('eggs_spam_host', cu.get_role_name(inst_mock, 'eggs'))
def _get_configs(service, cluster=None, node_group=None): def get_hadoop_dirs(mount_points, suffix): return ','.join([x + suffix for x in mount_points]) all_confs = { 'OOZIE': { 'mapreduce_yarn_service': cu.YARN_SERVICE_NAME }, 'YARN': { 'hdfs_service': cu.HDFS_SERVICE_NAME }, 'HUE': { 'hive_service': cu.HIVE_SERVICE_NAME, 'oozie_service': cu.OOZIE_SERVICE_NAME }, 'SPARK_ON_YARN': { 'yarn_service': cu.YARN_SERVICE_NAME } } if node_group: paths = node_group.storage_paths() ng_default_confs = { 'NAMENODE': { 'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn') }, 'SECONDARYNAMENODE': { 'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn') }, 'DATANODE': { 'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn') }, 'NODEMANAGER': { 'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths, '/yarn/local') } } ng_user_confs = node_group.node_configs all_confs = _merge_dicts(all_confs, ng_user_confs) all_confs = _merge_dicts(all_confs, ng_default_confs) if cluster: hive_confs = { 'HIVE': { 'hive_metastore_database_type': 'postgresql', 'hive_metastore_database_host': pu.get_manager(cluster).internal_ip, 'hive_metastore_database_port': '7432', 'hive_metastore_database_password': db_helper.get_hive_db_password(cluster), 'mapreduce_yarn_service': cu.YARN_SERVICE_NAME } } hue_confs = { 'HUE': { 'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE') } } all_confs = _merge_dicts(all_confs, hue_confs) all_confs = _merge_dicts(all_confs, hive_confs) all_confs = _merge_dicts(all_confs, cluster.cluster_configs) return all_confs.get(service, {})
def _get_configs(service, cluster=None, node_group=None): def get_hadoop_dirs(mount_points, suffix): return ','.join([x + suffix for x in mount_points]) all_confs = { 'OOZIE': { 'mapreduce_yarn_service': cu.YARN_SERVICE_NAME }, 'YARN': { 'hdfs_service': cu.HDFS_SERVICE_NAME }, 'HUE': { 'hive_service': cu.HIVE_SERVICE_NAME, 'oozie_service': cu.OOZIE_SERVICE_NAME }, 'SPARK_ON_YARN': { 'yarn_service': cu.YARN_SERVICE_NAME }, 'HBASE': { 'hdfs_service': cu.HDFS_SERVICE_NAME, 'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME } } if node_group: paths = node_group.storage_paths() ng_default_confs = { 'NAMENODE': { 'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn') }, 'SECONDARYNAMENODE': { 'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn') }, 'DATANODE': { 'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn') }, 'NODEMANAGER': { 'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths, '/yarn/local') } } ng_user_confs = pu.convert_process_configs(node_group.node_configs) all_confs = _merge_dicts(all_confs, ng_user_confs) all_confs = _merge_dicts(all_confs, ng_default_confs) if cluster: hive_confs = { 'HIVE': { 'hive_metastore_database_type': 'postgresql', 'hive_metastore_database_host': pu.get_manager(cluster).internal_ip, 'hive_metastore_database_port': '7432', 'hive_metastore_database_password': db_helper.get_hive_db_password(cluster), 'mapreduce_yarn_service': cu.YARN_SERVICE_NAME } } hue_confs = { 'HUE': { 'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE') } } all_confs = _merge_dicts(all_confs, hue_confs) all_confs = _merge_dicts(all_confs, hive_confs) all_confs = _merge_dicts(all_confs, cluster.cluster_configs) return all_confs.get(service, {})