def setup_java_patch(): import params if params.has_ranger_admin: setup_java_patch = format('ambari-python-wrap {kms_home}/db_setup.py -javapatch') env_dict = {'RANGER_KMS_HOME':params.kms_home, 'JAVA_HOME': params.java_home} if params.db_flavor.lower() == 'sqla': env_dict = {'RANGER_KMS_HOME':params.kms_home, 'JAVA_HOME': params.java_home, 'LD_LIBRARY_PATH':params.ld_library_path} Execute(setup_java_patch, environment=env_dict, logoutput=True, user=params.kms_user) kms_lib_path = format('{kms_home}/ews/webapp/lib/') files = os.listdir(kms_lib_path) hadoop_jar_files = [] for x in files: if x.startswith('hadoop-common') and x.endswith('.jar'): hadoop_jar_files.append(x) if len(hadoop_jar_files) != 0: for f in hadoop_jar_files: Execute((format('{java_home}/bin/jar'),'-uf', format('{kms_home}/ews/webapp/lib/{f}'), format('{kms_home}/ews/webapp/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory')), user=params.kms_user) File(format('{kms_home}/ews/webapp/lib/{f}'), owner=params.kms_user, group=params.kms_group)
def configure(self, env, upgrade_type=None, config_dir=None): from params import params env.set_params(params) File(format("/etc/default/metron"), content=Template("metron.j2") ) File(format("{metron_config_path}/management_ui.yml"), mode=0755, content=Template("management_ui.yml.j2"), owner=params.metron_user, group=params.metron_group ) File(format("{metron_management_ui_path}/assets/app-config.json"), content=Template("management-ui-app-config.json.j2"), owner=params.metron_user, group=params.metron_group ) Directory('/var/run/metron', create_parents=False, mode=0755, owner=params.metron_user, group=params.metron_group ) if params.metron_knox_enabled and not params.metron_ldap_enabled: raise Fail("Enabling Metron with Knox requires LDAP authentication. Please set 'LDAP Enabled' to true in the Metron Security tab.")
def load_template(self, env): from dashboard.dashboardindex import DashboardIndex import params env.set_params(params) hostname = format("{es_host}") port = int(format("{es_port}")) Logger.info("Connecting to Elasticsearch on host: %s, port: %s" % (hostname, port)) di = DashboardIndex(host=hostname, port=port) # Loads Kibana Dashboard definition from disk and replaces .kibana on index templateFile = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'dashboard', 'dashboard.p') if not os.path.isfile(templateFile): raise IOError( errno.ENOENT, os.strerror(errno.ENOENT), templateFile) Logger.info("Deleting .kibana index from Elasticsearch") di.es.indices.delete(index='.kibana', ignore=[400, 404]) Logger.info("Loading .kibana index from %s" % templateFile) di.put(data=di.load(filespec=templateFile))
def remove_solr_ssl_support(): import params if not params.solr_cloud_mode: return code, output = call( format( '{zk_client_prefix} -cmd get {solr_cloud_zk_directory}{clusterprops_json}' ), env={'JAVA_HOME': params.java64_home}, timeout=60 ) if "NoNodeException" in output: return Execute( format( '{zk_client_prefix} -cmd clear {solr_cloud_zk_directory}{clusterprops_json}' ), environment={'JAVA_HOME': params.java64_home}, ignore_failures=True, user=params.solr_config_user )
def service_check(self, env): import params env.set_params(params) if not os.path.isfile(params.solr_config_pid_file): Logger.error(format("PID file {solr_config_pid_file} does not exist")) exit(1) if not params.solr_collection_sample_create: Logger.info("Create sample collection unchecked, skipping ...") return if exists_collection(params.solr_collection_name): Logger.warning(format("Collection {solr_collection_name} already exists, skipping ...")) return if not params.solr_cloud_mode: Execute( format( '{solr_config_bin_dir}/solr create_core -c {solr_collection_name}' + ' -d {solr_collection_config_dir} -p {solr_config_port} >> {solr_config_service_log_file} 2>&1' ), environment={'JAVA_HOME': params.java64_home}, user=params.solr_config_user ) else: Execute(format( '{solr_config_bin_dir}/solr create_collection -c {solr_collection_name}' + ' -d {solr_collection_config_dir} -p {solr_config_port}' + ' -s {solr_collection_shards} -rf {solr_collection_replicas}' + ' >> {solr_config_service_log_file} 2>&1'), environment={'JAVA_HOME': params.java64_home}, user=params.solr_config_user )
def bootstrap_standby_namenode(params, use_path=False): bin_path = os.path.join(params.hadoop_bin_dir, '') if use_path else "" try: iterations = 50 bootstrap_cmd = format("{bin_path}hdfs namenode -bootstrapStandby -nonInteractive") # Blue print based deployments start both NN in parallel and occasionally # the first attempt to bootstrap may fail. Depending on how it fails the # second attempt may not succeed (e.g. it may find the folder and decide that # bootstrap succeeded). The solution is to call with -force option but only # during initial start if params.command_phase == "INITIAL_START": bootstrap_cmd = format("{bin_path}hdfs namenode -bootstrapStandby -nonInteractive -force") Logger.info("Boostrapping standby namenode: %s" % (bootstrap_cmd)) for i in range(iterations): Logger.info('Try %d out of %d' % (i+1, iterations)) code, out = shell.call(bootstrap_cmd, logoutput=False, user=params.hdfs_user) if code == 0: Logger.info("Standby namenode bootstrapped successfully") return True elif code == 5: Logger.info("Standby namenode already bootstrapped") return True else: Logger.warning('Bootstrap standby namenode failed with %d error code. Will retry' % (code)) except Exception as ex: Logger.error('Bootstrap standby namenode threw an exception. Reason %s' %(str(ex))) return False
def setup_solr_metrics_support(): import params Directory( [ params.solr_metrics_config_conf_dir, params.solr_metrics_config_pid_dir, params.solr_metrics_config_log_dir ], mode=0755, cd_access='a', owner=params.solr_config_user, group=params.solr_config_group, create_parents=True ) File( format("{solr_metrics_config_conf_dir}/solr.metrics.properties"), content=InlineTemplate(params.solr_metrics_properties), owner=params.solr_config_user ) File( format("{solr_metrics_config_conf_dir}/log4j2.xml"), content=Template("log4j2.xml"), owner=params.solr_config_user ) if params.security_enabled: File( format("{solr_metrics_kerberos_jaas_config}"), content=Template("solr_metrics_jaas.conf.j2"), owner=params.solr_config_user )
def start_rest_application(self): """ Start the REST application """ Logger.info('Starting REST application') if self.__params.security_enabled: kinit(self.__params.kinit_path_local, self.__params.metron_keytab_path, self.__params.metron_principal_name, execute_user=self.__params.metron_user) # Get the PID associated with the service pid_file = format("{metron_rest_pid_dir}/{metron_rest_pid}") pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=self.__params.metron_user, is_checked_call=False)[1] process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") # Set the password with env variable instead of param to avoid it showing in ps cmd = format(( "export METRON_JDBC_PASSWORD={metron_jdbc_password!p};" "export JAVA_HOME={java_home};" "export METRON_REST_CLASSPATH={metron_rest_classpath};" "export METRON_INDEX_CP={metron_indexing_classpath};" "export METRON_LOG_DIR={metron_log_dir};" "export METRON_PID_FILE={pid_file};" "{metron_home}/bin/metron-rest.sh;" "unset METRON_JDBC_PASSWORD;" )) Execute(cmd, user = self.__params.metron_user, logoutput=True, not_if = process_id_exists_command, timeout=60) Logger.info('Done starting REST application')
def setup_solr_cloud(): import params code, output = call( format( '{zk_client_prefix} -cmd get {solr_cloud_zk_directory}{clusterstate_json}' ), env={'JAVA_HOME': params.java64_home}, timeout=60 ) if not ("NoNodeException" in output): Logger.info( format( "ZK node {solr_cloud_zk_directory}{clusterstate_json} already exists, skipping ..." ) ) return Execute( format( '{zk_client_prefix} -cmd makepath {solr_cloud_zk_directory}' ), environment={'JAVA_HOME': params.java64_home}, ignore_failures=True, user=params.solr_config_user )
def action_create(self): with Environment.get_instance_copy() as env: with tempfile.NamedTemporaryFile() as tmpf: repo_file_name = format("{repo_file_name}.list", repo_file_name=self.resource.repo_file_name) repo_file_path = format("{repo_dir}/{repo_file_name}", repo_dir=self.repo_dir) new_content = Template( self.resource.repo_template, package_type=self.package_type, base_url=self.resource.base_url, components=" ".join(self.resource.components), ).get_content() old_content = "" if self.resource.append_to_file and os.path.isfile(repo_file_path): with open(repo_file_path) as repo_file: old_content = repo_file.read() + "\n" File(tmpf.name, content=old_content + new_content) if not os.path.isfile(repo_file_path) or not filecmp.cmp(tmpf.name, repo_file_path): File(repo_file_path, content=StaticFile(tmpf.name)) update_cmd_formatted = [format(x) for x in self.update_cmd] # this is time expensive retcode, out = checked_call(update_cmd_formatted, sudo=True) # add public keys for new repos missing_pkeys = set(re.findall(self.missing_pkey_regex, out)) for pkey in missing_pkeys: Execute( format(self.add_pkey_cmd), timeout=15, # in case we are on the host w/o internet (using localrepo), we should ignore hanging ignore_failures=True, )
def decommission(): import params hdfs_user = params.hdfs_user conf_dir = params.hadoop_conf_dir user_group = params.user_group nn_kinit_cmd = params.nn_kinit_cmd File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=hdfs_user, group=user_group ) if not params.update_exclude_file_only: Execute(nn_kinit_cmd, user=hdfs_user ) if params.dfs_ha_enabled: # due to a bug in hdfs, refreshNodes will not run on both namenodes so we # need to execute each command scoped to a particular namenode nn_refresh_cmd = format('dfsadmin -fs hdfs://{namenode_rpc} -refreshNodes') else: nn_refresh_cmd = format('dfsadmin -fs {namenode_address} -refreshNodes') ExecuteHadoop(nn_refresh_cmd, user=hdfs_user, conf_dir=conf_dir, kinit_override=True, bin_dir=params.hadoop_bin_dir)
def prepare_rolling_upgrade(): """ Perform either an upgrade or a downgrade. Rolling Upgrade for HDFS Namenode requires the following. 0. Namenode must be up 1. Leave safemode if the safemode status is not OFF 2. Execute a rolling upgrade "prepare" 3. Execute a rolling upgrade "query" """ import params if not params.upgrade_direction or params.upgrade_direction not in [Direction.UPGRADE, Direction.DOWNGRADE]: raise Fail("Could not retrieve upgrade direction: %s" % str(params.upgrade_direction)) Logger.info(format("Performing a(n) {params.upgrade_direction} of HDFS")) if params.security_enabled: Execute(format("{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}")) if params.upgrade_direction == Direction.UPGRADE: safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user, SafeMode.OFF, True) if not safemode_transition_successful: raise Fail("Could not transition to safemode state %s. Please check logs to make sure namenode is up." % str(SafeMode.OFF)) prepare = "hdfs dfsadmin -rollingUpgrade prepare" query = "hdfs dfsadmin -rollingUpgrade query" Execute(prepare, user=params.hdfs_user, logoutput=True) Execute(query, user=params.hdfs_user, logoutput=True) elif params.upgrade_direction == Direction.DOWNGRADE: pass
def post_upgrade_restart(self, env, upgrade_type=None): if upgrade_type == "nonrolling": return Logger.info("Executing Stack Upgrade post-restart") import params env.set_params(params) zk_server_host = random.choice(params.zookeeper_hosts) cli_shell = format("{zk_cli_shell} -server {zk_server_host}:{client_port}") # Ensure that a quorum is still formed. unique = get_unique_id_and_date() create_command = format("echo 'create /{unique} mydata' | {cli_shell}") list_command = format("echo 'ls /' | {cli_shell}") delete_command = format("echo 'delete /{unique} ' | {cli_shell}") quorum_err_message = "Failed to establish zookeeper quorum" call_and_match_output(create_command, 'Created', quorum_err_message, user=params.zk_user) call_and_match_output(list_command, r"\[.*?" + unique + ".*?\]", quorum_err_message, user=params.zk_user) shell.call(delete_command, user=params.zk_user) if params.client_port: check_leader_command = format("echo stat | nc localhost {client_port} | grep Mode") code, out = shell.call(check_leader_command, logoutput=False) if code == 0 and out: Logger.info(out)
def setup_ranger_plugin_keystore(service_name, audit_db_is_enabled, stack_version, credential_file, xa_audit_db_password, ssl_truststore_password, ssl_keystore_password, component_user, component_group, java_home): stack_root = Script.get_stack_root() service_name = str(service_name).lower() cred_lib_path = format('{stack_root}/{stack_version}/ranger-{service_name}-plugin/install/lib/*') cred_setup_prefix = (format('{stack_root}/{stack_version}/ranger-{service_name}-plugin/ranger_credential_helper.py'), '-l', cred_lib_path) if service_name == 'nifi': cred_lib_path = format('{stack_root}/{stack_version}/{service_name}/ext/ranger/install/lib/*') cred_setup_prefix = (format('{stack_root}/{stack_version}/{service_name}/ext/ranger/scripts/ranger_credential_helper.py'), '-l', cred_lib_path) if audit_db_is_enabled: cred_setup = cred_setup_prefix + ('-f', credential_file, '-k', 'auditDBCred', '-v', PasswordString(xa_audit_db_password), '-c', '1') Execute(cred_setup, environment={'JAVA_HOME': java_home}, logoutput=True, sudo=True) cred_setup = cred_setup_prefix + ('-f', credential_file, '-k', 'sslKeyStore', '-v', PasswordString(ssl_keystore_password), '-c', '1') Execute(cred_setup, environment={'JAVA_HOME': java_home}, logoutput=True, sudo=True) cred_setup = cred_setup_prefix + ('-f', credential_file, '-k', 'sslTrustStore', '-v', PasswordString(ssl_truststore_password), '-c', '1') Execute(cred_setup, environment={'JAVA_HOME': java_home}, logoutput=True, sudo=True) File(credential_file, owner = component_user, group = component_group, mode = 0640 )
def kill_zkfc(zkfc_user): """ There are two potential methods for failing over the namenode, especially during a Rolling Upgrade. Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it. Option 2. Silent failover (not supported as of HDP 2.2.0.0) :param zkfc_user: User that started the ZKFC process. :return: Return True if ZKFC was killed, otherwise, false. """ import params if params.dfs_ha_enabled: zkfc_pid_file = get_service_pid_file("zkfc", zkfc_user) if zkfc_pid_file: check_process = as_user(format("ls {zkfc_pid_file} > /dev/null 2>&1 && ps -p `cat {zkfc_pid_file}` > /dev/null 2>&1"), user=zkfc_user) code, out = shell.call(check_process) if code == 0: Logger.debug("ZKFC is running and will be killed.") kill_command = format("kill -15 `cat {zkfc_pid_file}`") Execute(kill_command, user=zkfc_user ) File(zkfc_pid_file, action = "delete", ) return True return False
def is_active_namenode(hdfs_binary): """ Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active returns False. :return: True if current NameNode is active, False otherwise """ import params if params.dfs_ha_enabled: is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) for i in range(0, 5): code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0 if code == 0: # active return True code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code will be 0 if code == 0: # other NN is active return False if i < 4: # Do not sleep after last iteration time.sleep(6) Logger.info("Active NameNode is not found.") return False else: return True
def setup_hdp_install_directory(): # This is a name of marker file. SELECT_ALL_PERFORMED_MARKER = "/var/lib/ambari-agent/data/hdp-select-set-all.performed" import params if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0: Execute(as_sudo(['touch', SELECT_ALL_PERFORMED_MARKER]) + ' ; ' + format('{sudo} /usr/bin/hdp-select set all `ambari-python-wrap /usr/bin/hdp-select versions | grep ^{stack_version_unformatted} | tail -1`'), only_if=format('ls -d /usr/hdp/{stack_version_unformatted}*'), # If any HDP version is installed not_if=format("test -f {SELECT_ALL_PERFORMED_MARKER}") # Do that only once (otherwise we break rolling upgrade logic) )
def setup_ranger_plugin_jar_symblink(hdp_version, service_name, component_list): jar_files = os.listdir(format('/usr/hdp/{hdp_version}/ranger-{service_name}-plugin/lib')) for jar_file in jar_files: for component in component_list: Execute(('ln','-sf',format('/usr/hdp/{hdp_version}/ranger-{service_name}-plugin/lib/{jar_file}'),format('/usr/hdp/current/{component}/lib/{jar_file}')), not_if=format('ls /usr/hdp/current/{component}/lib/{jar_file}'), only_if=format('ls /usr/hdp/{hdp_version}/ranger-{service_name}-plugin/lib/{jar_file}'), sudo=True)
def setup_ranger_plugin(component_select_name, service_name, downloaded_custom_connector, driver_curl_source, driver_curl_target, java_home, repo_name, plugin_repo_dict, ranger_env_properties, plugin_properties, policy_user, policymgr_mgr_url, plugin_enabled, component_user, component_group, api_version=None, skip_if_rangeradmin_down = True, **kwargs): File(downloaded_custom_connector, content = DownloadSource(driver_curl_source), mode = 0644 ) Execute(('cp', '--remove-destination', downloaded_custom_connector, driver_curl_target), path=["/bin", "/usr/bin/"], sudo=True ) File(driver_curl_target, mode=0644) hdp_version = get_hdp_version(component_select_name) file_path = format('/usr/hdp/{hdp_version}/ranger-{service_name}-plugin/install.properties') if not os.path.isfile(file_path): raise Fail(format('Ranger {service_name} plugin install.properties file does not exist at {file_path}')) ModifyPropertiesFile(file_path, properties = plugin_properties ) custom_plugin_properties = dict() custom_plugin_properties['CUSTOM_USER'] = component_user custom_plugin_properties['CUSTOM_GROUP'] = component_group ModifyPropertiesFile(file_path,properties = custom_plugin_properties) if plugin_enabled: cmd = (format('enable-{service_name}-plugin.sh'),) if api_version == 'v2' and api_version is not None: ranger_adm_obj = RangeradminV2(url=policymgr_mgr_url, skip_if_rangeradmin_down = skip_if_rangeradmin_down) else: ranger_adm_obj = Rangeradmin(url=policymgr_mgr_url, skip_if_rangeradmin_down = skip_if_rangeradmin_down) ranger_adm_obj.create_ranger_repository(service_name, repo_name, plugin_repo_dict, ranger_env_properties['ranger_admin_username'], ranger_env_properties['ranger_admin_password'], ranger_env_properties['admin_username'], ranger_env_properties['admin_password'], policy_user) else: cmd = (format('disable-{service_name}-plugin.sh'),) cmd_env = {'JAVA_HOME': java_home, 'PWD': format('/usr/hdp/{hdp_version}/ranger-{service_name}-plugin'), 'PATH': format('/usr/hdp/{hdp_version}/ranger-{service_name}-plugin')} Execute(cmd, environment=cmd_env, logoutput=True, sudo=True, )
def setup_ranger_plugin_jar_symblink(stack_version, service_name, component_list): stack_root = Script.get_stack_root() jar_files = os.listdir(format('{stack_root}/{stack_version}/ranger-{service_name}-plugin/lib')) for jar_file in jar_files: for component in component_list: Execute(('ln','-sf',format('{stack_root}/{stack_version}/ranger-{service_name}-plugin/lib/{jar_file}'),format('{stack_root}/current/{component}/lib/{jar_file}')), not_if=format('ls {stack_root}/current/{component}/lib/{jar_file}'), only_if=format('ls {stack_root}/{stack_version}/ranger-{service_name}-plugin/lib/{jar_file}'), sudo=True)
def solr_port_validation(): code, output = call( format('netstat -lnt | awk -v v1={solr_config_port} \'$6 == "LISTEN" && $4 ~ ":"v1\''), timeout=60) Logger.info(format("Solr port validation output: {output}")) if "LISTEN" in output: Logger.error(format("The port {solr_config_port} is not available")) return False return True
def get_service_pid_file(name, user): """ Get the pid file path that was used to start the service by the user. :param name: Service name :param user: User that started the service. :return: PID file path """ import params pid_dir = format("{hadoop_pid_dir_prefix}/{user}") pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid") return pid_file
def action_remove(self): with Environment.get_instance_copy() as env: repo_file_name = format("{repo_file_name}.list", repo_file_name=self.resource.repo_file_name) repo_file_path = format("{repo_dir}/{repo_file_name}", repo_dir=self.repo_dir) if os.path.isfile(repo_file_path): File(repo_file_path, action="delete") # this is time expensive update_cmd_formatted = [format(x) for x in self.update_cmd] Execute(update_cmd_formatted)
def start(self, env, upgrade_type=None): import params env.set_params(params) self.configure(env, upgrade_type=upgrade_type) if params.is_supported_kafka_ranger: setup_ranger_kafka() #Ranger Kafka Plugin related call daemon_cmd = format('source {params.conf_dir}/kafka-env.sh ; {params.kafka_bin} start') no_op_test = format('ls {params.kafka_pid_file} >/dev/null 2>&1 && ps -p `cat {params.kafka_pid_file}` >/dev/null 2>&1') Execute(daemon_cmd, user=params.kafka_user, not_if=no_op_test )
def hive(name=None): import params XmlConfig("hive-site.xml", conf_dir = params.hive_conf_dir, configurations = params.config['configurations']['hive-site'], owner=params.hive_user, configuration_attributes=params.config['configuration_attributes']['hive-site'] ) if name in ["hiveserver2","metastore"]: # Manually overriding service logon user & password set by the installation package service_name = params.service_map[name] ServiceConfig(service_name, action="change_user", username = params.hive_user, password = Script.get_password(params.hive_user)) Execute(format("cmd /c hadoop fs -mkdir -p {hive_warehouse_dir}"), logoutput=True, user=params.hadoop_user) if name == 'metastore': if params.init_metastore_schema: check_schema_created_cmd = format('cmd /c "{hive_bin}\\hive.cmd --service schematool -info ' '-dbType {hive_metastore_db_type} ' '-userName {hive_metastore_user_name} ' '-passWord {hive_metastore_user_passwd!p}' '&set EXITCODE=%ERRORLEVEL%&exit /B %EXITCODE%"', #cmd "feature", propagate the process exit code manually hive_bin=params.hive_bin, hive_metastore_db_type=params.hive_metastore_db_type, hive_metastore_user_name=params.hive_metastore_user_name, hive_metastore_user_passwd=params.hive_metastore_user_passwd) try: Execute(check_schema_created_cmd) except Fail: create_schema_cmd = format('cmd /c {hive_bin}\\hive.cmd --service schematool -initSchema ' '-dbType {hive_metastore_db_type} ' '-userName {hive_metastore_user_name} ' '-passWord {hive_metastore_user_passwd!p}', hive_bin=params.hive_bin, hive_metastore_db_type=params.hive_metastore_db_type, hive_metastore_user_name=params.hive_metastore_user_name, hive_metastore_user_passwd=params.hive_metastore_user_passwd) Execute(create_schema_cmd, user = params.hive_user, logoutput=True ) if name == "hiveserver2": if params.hive_execution_engine == "tez": # Init the tez app dir in hadoop script_file = __file__.replace('/', os.sep) cmd_file = os.path.normpath(os.path.join(os.path.dirname(script_file), "..", "files", "hiveTezSetup.cmd")) Execute("cmd /c " + cmd_file, logoutput=True, user=params.hadoop_user)
def is_namenode_formatted(params): old_mark_dirs = params.namenode_formatted_old_mark_dirs mark_dirs = params.namenode_formatted_mark_dirs nn_name_dirs = params.dfs_name_dir.split(',') marked = False # Check if name directories have been marked as formatted for mark_dir in mark_dirs: if os.path.isdir(mark_dir): marked = True print format("{mark_dir} exists. Namenode DFS already formatted") # Ensure that all mark dirs created for all name directories if marked: for mark_dir in mark_dirs: Directory(mark_dir, recursive = True ) return marked # Move all old format markers to new place for old_mark_dir in old_mark_dirs: if os.path.isdir(old_mark_dir): for mark_dir in mark_dirs: Execute(('cp', '-ar', old_mark_dir, mark_dir), sudo = True ) marked = True Directory(old_mark_dir, action = "delete" ) elif os.path.isfile(old_mark_dir): for mark_dir in mark_dirs: Directory(mark_dir, recursive = True, ) Directory(old_mark_dir, action = "delete" ) marked = True # Check if name dirs are not empty for name_dir in nn_name_dirs: try: Execute(format("ls {name_dir} | wc -l | grep -q ^0$"), ) marked = False except Exception: marked = True print format("ERROR: Namenode directory(s) is non empty. Will not format the namenode. List of non-empty namenode dirs {nn_name_dirs}") break return marked
def setup_java_patch(rolling_upgrade=False): import params ranger_home = params.ranger_home if rolling_upgrade: ranger_home = format("/usr/hdp/{version}/ranger-admin") setup_java_patch = format('python {ranger_home}/db_setup.py -javapatch') Execute(setup_java_patch, environment={'RANGER_ADMIN_HOME':ranger_home, 'JAVA_HOME': params.java_home}, logoutput=True, user=params.unix_user, )
def service_check(self, env): import params env.set_params(params) if params.security_enabled: spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") Execute(spark_kinit_cmd, user=params.spark_user) Execute(format("curl -s -o /dev/null -w'%{{http_code}}' --negotiate -u: -k http://{spark_history_server_host}:{spark_history_ui_port} | grep 200"), tries = 10, try_sleep=3, logoutput=True )
def remove_solr_kerberos_auth(): import params if not _has_security_json(): Logger.debug(format("Solr Security Json not found {solr_cloud_zk_directory}{security_json}")) return Execute(format('{zk_client_prefix} -cmd clear {solr_cloud_zk_directory}{security_json}'), environment={'JAVA_HOME': params.java64_home}, timeout=60, ignore_failures=True, user=params.solr_config_user )
def setup_ranger_db(rolling_upgrade=False): import params File(params.downloaded_custom_connector, content = DownloadSource(params.driver_curl_source), mode = 0644 ) Directory(params.java_share_dir, mode=0755 ) Execute(('cp', '--remove-destination', params.downloaded_custom_connector, params.driver_curl_target), path=["/bin", "/usr/bin/"], sudo=True) File(params.driver_curl_target, mode=0644) ranger_home = params.ranger_home if rolling_upgrade: ranger_home = format("/usr/hdp/{version}/ranger-admin") Execute(('cp', '--remove-destination', params.downloaded_custom_connector, os.path.join(params.ranger_home, 'ews', 'lib')), path=["/bin", "/usr/bin/"], sudo=True) File(os.path.join(params.ranger_home, 'ews', 'lib',params.jdbc_jar_name), mode=0644) ModifyPropertiesFile(format("{ranger_home}/install.properties"), properties = params.config['configurations']['admin-properties'], owner = params.unix_user, ) # User wants us to setup the DB user and DB? if params.create_db_dbuser: Logger.info('Setting up Ranger DB and DB User') dba_setup = format('python {ranger_home}/dba_script.py -q') Execute(dba_setup, environment={'RANGER_ADMIN_HOME':ranger_home, 'JAVA_HOME': params.java_home}, logoutput=True, user=params.unix_user, ) else: Logger.info('Separate DBA property not set. Assuming Ranger DB and DB User exists!') db_setup = format('python {ranger_home}/db_setup.py') Execute(db_setup, environment={'RANGER_ADMIN_HOME':ranger_home, 'JAVA_HOME': params.java_home}, logoutput=True, user=params.unix_user, )
"{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};" ) if kinit_if_needed: Execute(kinit_if_needed, user=component_user, path='/bin') source_and_dest_pairs = [ (component_tar_source_file, destination_file), ] return _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed) env.set_params(params) hadoop_conf_dir = params.hadoop_conf_dir oozie_libext_dir = format("/usr/hdp/{hdp_version}/oozie/libext") oozie_home = format("/usr/hdp/{hdp_version}/oozie") oozie_setup_sh = format("/usr/hdp/{hdp_version}/oozie/bin/oozie-setup.sh") oozie_tmp_dir = "/var/tmp/oozie" configure_cmds = [] configure_cmds.append( ('tar', '-xvf', oozie_home + '/oozie-sharelib.tar.gz', '-C', oozie_home)) configure_cmds.append(('cp', "/usr/share/HDP-oozie/ext-2.2.zip", format("/usr/hdp/{hdp_version}/oozie/libext"))) configure_cmds.append( ('chown', 'oozie:hadoop', oozie_libext_dir + "/ext-2.2.zip")) no_op_test = "ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1" File("/etc/oozie/conf/oozie-env.sh",
def setup_hiveserver2(): import params File(params.start_hiveserver2_path, mode=0755, content=Template(format('{start_hiveserver2_script}'))) File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hiveserver2.properties"), owner=params.hive_user, group=params.user_group, content=Template("hadoop-metrics2-hiveserver2.properties.j2"), mode=0600) XmlConfig( "hiveserver2-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations']['hiveserver2-site'], configuration_attributes=params.config['configuration_attributes'] ['hiveserver2-site'], owner=params.hive_user, group=params.user_group, mode=0600) # copy tarball to HDFS feature not supported if not (params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major)): params.HdfsResource(params.webhcat_apps_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=0755) # Create webhcat dirs. if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsResource(params.hcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.hcat_hdfs_user_mode) params.HdfsResource(params.webhcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode) # ****** Begin Copy Tarballs ****** # ********************************* # if copy tarball to HDFS feature supported copy mapreduce.tar.gz and tez.tar.gz to HDFS if params.stack_version_formatted_major and check_stack_feature( StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major): copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode. # This can use a different source and dest location to account copy_to_hdfs("pig", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.pig_tar_source, custom_dest_file=params.pig_tar_dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) copy_to_hdfs("hive", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.hive_tar_source, custom_dest_file=params.hive_tar_dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) wildcard_tarballs = ["sqoop", "hadoop_streaming"] for tarball_name in wildcard_tarballs: source_file_pattern = eval("params." + tarball_name + "_tar_source") dest_dir = eval("params." + tarball_name + "_tar_dest_dir") if source_file_pattern is None or dest_dir is None: continue source_files = glob.glob( source_file_pattern) if "*" in source_file_pattern else [ source_file_pattern ] for source_file in source_files: src_filename = os.path.basename(source_file) dest_file = os.path.join(dest_dir, src_filename) copy_to_hdfs(tarball_name, params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=source_file, custom_dest_file=dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) # ******* End Copy Tarballs ******* # ********************************* # if warehouse directory is in DFS if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse( params.default_fs).scheme: # Create Hive Metastore Warehouse Dir params.HdfsResource(params.hive_apps_whs_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=0777) else: Logger.info( format( "Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS." )) # Create Hive User Dir params.HdfsResource(params.hive_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=params.hive_hdfs_user_mode) if not is_empty(params.hive_exec_scratchdir) and not urlparse( params.hive_exec_scratchdir).path.startswith("/tmp"): params.HdfsResource( params.hive_exec_scratchdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.hdfs_user, mode=0777 ) # Hive expects this dir to be writeable by everyone as it is used as a temp dir if params.hive_repl_cmrootdir is not None: params.HdfsResource(params.hive_repl_cmrootdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=01777) if params.hive_repl_rootdir is not None: params.HdfsResource(params.hive_repl_rootdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=0700) params.HdfsResource(None, action="execute")
def setup_spark(env, type, upgrade_type=None, action=None): import params Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents=True) if type == 'server' and action == 'config': params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775) params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775) params.HdfsResource(None, action="execute") PropertiesFile( format("{spark_conf}/spark-defaults.conf"), properties=params.config['configurations']['spark2-defaults'], key_value_delimiter=" ", owner=params.spark_user, group=params.spark_group, mode=0644) # create spark-env.sh in etc/conf dir File( os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File( os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties), mode=0644) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile(params.spark_thrift_server_conf_file, properties=params.config['configurations'] ['spark2-thrift-sparkconf'], owner=params.hive_user, group=params.user_group, key_value_delimiter=" ", mode=0644) if params.spark_thrift_fairscheduler_content: # create spark-thrift-fairscheduler.xml File(os.path.join(params.spark_conf, "spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content))
def startRebalancingProcess(threshold, rebalance_env): rebalanceCommand = format('hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}') return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env)
limitations under the License. """ import os from resource_management.libraries.script.script import Script from resource_management.libraries.functions.format import format from resource_management.libraries.functions import get_kinit_path from resource_management.libraries.functions.default import default from resource_management.libraries.functions import conf_select from resource_management.libraries.functions import stack_select config = Script.get_config() conf_file = config['configurations']['atlas-env']['metadata_conf_file'] conf_dir = os.environ[ 'METADATA_CONF'] if 'METADATA_CONF' in os.environ else '/etc/atlas/conf' pid_dir = config['configurations']['atlas-env']['metadata_pid_dir'] pid_file = format("{pid_dir}/atlas.pid") metadata_user = config['configurations']['atlas-env']['metadata_user'] # Security related/required params hostname = config['hostname'] security_enabled = config['configurations']['cluster-env']['security_enabled'] kinit_path_local = get_kinit_path( default('/configurations/kerberos-env/executable_search_paths', None)) tmp_dir = Script.get_tmp_dir() stack_name = default("/hostLevelParams/stack_name", None) hadoop_conf_dir = conf_select.get_hadoop_conf_dir() hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
def service(action=None, name=None, user=None, options="", create_pid_dir=False, create_log_dir=False): """ :param action: Either "start" or "stop" :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc" :param user: User to run the command as :param options: Additional options to pass to command as a string :param create_pid_dir: Create PID directory :param create_log_dir: Crate log file directory """ import params options = options if options else "" pid_dir = format("{hadoop_pid_dir_prefix}/{user}") pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid") hadoop_env_exports = {'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir} log_dir = format("{hdfs_log_dir_prefix}/{user}") # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542 if name == "nfs3": pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid") custom_export = { 'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user, 'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir, 'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir } hadoop_env_exports.update(custom_export) check_process = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo( ["pgrep", "-F", pid_file]) # on STOP directories shouldn't be created # since during stop still old dirs are used (which were created during previous start) if action != "stop": if name == "nfs3": Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.root_user, group=params.root_group) else: Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.hdfs_user, group=params.user_group) if create_pid_dir: Directory(pid_dir, owner=user, recursive=True) if create_log_dir: if name == "nfs3": Directory(log_dir, mode=0775, owner=params.root_user, group=params.user_group) else: Directory(log_dir, owner=user, recursive=True) if params.security_enabled and name == "datanode": ## The directory where pid files are stored in the secure data environment. hadoop_secure_dn_pid_dir = format( "{hadoop_pid_dir_prefix}/{hdfs_user}") hadoop_secure_dn_pid_file = format( "{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid") # At Champlain stack and further, we may start datanode as a non-root even in secure cluster if not (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0 ) or params.secure_dn_ports_are_in_use: user = "******" pid_file = format( "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid" ) if action == 'stop' and (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0) and \ os.path.isfile(hadoop_secure_dn_pid_file): # We need special handling for this case to handle the situation # when we configure non-root secure DN and then restart it # to handle new configs. Otherwise we will not be able to stop # a running instance user = "******" try: check_process_status(hadoop_secure_dn_pid_file) custom_export = {'HADOOP_SECURE_DN_USER': params.hdfs_user} hadoop_env_exports.update(custom_export) except ComponentIsNotRunning: pass hadoop_daemon = format("{hadoop_bin}/hadoop-daemon.sh") if user == "root": cmd = [hadoop_daemon, "--config", params.hadoop_conf_dir, action, name] if options: cmd += [ options, ] daemon_cmd = as_sudo(cmd) else: cmd = format( "{ulimit_cmd} {hadoop_daemon} --config {hadoop_conf_dir} {action} {name}" ) if options: cmd += " " + options daemon_cmd = as_user(cmd, user) service_is_up = check_process if action == "start" else None #remove pid file from dead process File(pid_file, action="delete", not_if=check_process) Execute(daemon_cmd, not_if=service_is_up, environment=hadoop_env_exports) if action == "stop": File( pid_file, action="delete", )
def service(componentName, action='start', serviceName='yarn'): import params if serviceName == 'mapreduce' and componentName == 'historyserver': delete_pid_file = True daemon = format("{mapred_bin}/mr-jobhistory-daemon.sh") pid_file = format( "{mapred_pid_dir}/mapred-{mapred_user}-{componentName}.pid") usr = params.mapred_user log_dir = params.mapred_log_dir else: # !!! yarn-daemon.sh deletes the PID for us; if we remove it the script # may not work correctly when stopping the service delete_pid_file = False daemon = format("{yarn_bin}/yarn-daemon.sh") pid_file = format( "{yarn_pid_dir}/yarn-{yarn_user}-{componentName}.pid") usr = params.yarn_user log_dir = params.yarn_log_dir cmd = format( "export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {daemon} --config {hadoop_conf_dir}" ) if action == 'start': daemon_cmd = format("{ulimit_cmd} {cmd} start {componentName}") check_process = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo( ["pgrep", "-F", pid_file]) # Remove the pid file if its corresponding process is not running. File(pid_file, action="delete", not_if=check_process) if componentName == 'timelineserver' and serviceName == 'yarn': File(params.ats_leveldb_lock_file, action="delete", only_if=format("ls {params.ats_leveldb_lock_file}"), not_if=check_process, ignore_failures=True) try: # Attempt to start the process. Internally, this is skipped if the process is already running. Execute(daemon_cmd, user=usr, not_if=check_process) # Ensure that the process with the expected PID exists. Execute( check_process, not_if=check_process, tries=5, try_sleep=1, ) except: show_logs(log_dir, usr) raise elif action == 'stop': daemon_cmd = format("{cmd} stop {componentName}") try: Execute(daemon_cmd, user=usr) except: show_logs(log_dir, usr) raise # !!! yarn-daemon doesn't need us to delete PIDs if delete_pid_file is True: File(pid_file, action="delete") elif action == 'refreshQueues': rm_kinit_cmd = params.rm_kinit_cmd refresh_cmd = format( "{rm_kinit_cmd} export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {yarn_container_bin}/yarn rmadmin -refreshQueues" ) Execute(refresh_cmd, user=usr)
def start(self, env): import params env.set_params(params) self.configure(env) src_file = '/etc/zookeeper/conf.dist/zoo.cfg' dst_file = format('{solr_config_data_dir}') Execute(('cp', '-f', src_file, dst_file), sudo=True) cmd = ('chmod','-R','755',format('{cloud_scripts}/zkcli.sh')) Execute(cmd, sudo = True) if not solr_port_validation(): exit(1) if not solr_status_validation(): exit(1) Logger.info("Starting Solr ... ") # TODO use solr.in.sh to start args instead of here LWSHADOOP-648 start_command = format('{solr_config_bin_dir}/solr start -h {hostname}') if params.solr_cloud_mode: start_command += format(' -cloud -z {zookeeper_hosts}{solr_cloud_zk_directory}') elif not params.solr_cloud_mode and params.security_enabled: start_command += ' -DauthenticationPlugin=org.apache.solr.security.KerberosPlugin' if params.solr_hdfs_enable: start_command += format( ' -Dsolr.directoryFactory=HdfsDirectoryFactory -Dsolr.lock.type=hdfs') start_command += format(' -Dsolr.hdfs.home={default_fs}{solr_hdfs_directory}') start_command += format(' -Dsolr.hdfs.confdir={hadoop_conf_dir}') if params.security_enabled: start_command += format(' -Dsolr.hdfs.security.kerberos.enabled=true') start_command += format( ' -Dsolr.hdfs.security.kerberos.keytabfile={solr_kerberos_keytab}') start_command += format( ' -Dsolr.hdfs.security.kerberos.principal={solr_kerberos_principal}') start_command += format( ' -p {solr_config_port} -m {solr_config_memory} >> {solr_config_service_log_file} 2>&1') Execute( start_command, environment={'JAVA_HOME': params.java64_home}, user=params.solr_config_user )
config = Script.get_config() tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY security_enabled = status_params.security_enabled credential_store_enabled = False if 'credentialStoreEnabled' in config: credential_store_enabled = config['credentialStoreEnabled'] logsearch_server_conf = "/usr/lib/ambari-logsearch-portal/conf" logsearch_server_keys_folder = logsearch_server_conf + "/keys" logsearch_logfeeder_conf = "/usr/lib/ambari-logsearch-logfeeder/conf" logsearch_logfeeder_keys_folder = logsearch_logfeeder_conf + "/keys" logsearch_config_set_dir = format("{logsearch_server_conf}/solr_configsets") # logsearch pid file logsearch_pid_dir = status_params.logsearch_pid_dir logsearch_pid_file = status_params.logsearch_pid_file # logfeeder pid file logfeeder_pid_dir = status_params.logfeeder_pid_dir logfeeder_pid_file = status_params.logfeeder_pid_file user_group = config['configurations']['cluster-env']['user_group'] # shared configs java_home = config['ambariLevelParams']['java_home'] ambari_java_home = default("/commandParams/ambari_java_home", None) java64_home = ambari_java_home if ambari_java_home is not None else java_home
def setup_ranger_plugin(component_select_name, service_name, downloaded_custom_connector, driver_curl_source, driver_curl_target, java_home, repo_name, plugin_repo_dict, ranger_env_properties, plugin_properties, policy_user, policymgr_mgr_url, plugin_enabled, component_user, component_group, api_version=None, skip_if_rangeradmin_down=True, **kwargs): File(downloaded_custom_connector, content=DownloadSource(driver_curl_source), mode=0644) Execute(('cp', '--remove-destination', downloaded_custom_connector, driver_curl_target), path=["/bin", "/usr/bin/"], sudo=True) File(driver_curl_target, mode=0644) file_path = format( '/usr/lib/ranger-{service_name}-plugin/install.properties') if not os.path.isfile(file_path): raise Fail( format( 'Ranger {service_name} plugin install.properties file does not exist at {file_path}' )) ModifyPropertiesFile(file_path, properties=plugin_properties) custom_plugin_properties = dict() custom_plugin_properties['CUSTOM_USER'] = component_user custom_plugin_properties['CUSTOM_GROUP'] = component_group ModifyPropertiesFile(file_path, properties=custom_plugin_properties) if plugin_enabled: cmd = (format('enable-{service_name}-plugin.sh'), ) if api_version == 'v2' and api_version is not None: ranger_adm_obj = RangeradminV2( url=policymgr_mgr_url, skip_if_rangeradmin_down=skip_if_rangeradmin_down) else: ranger_adm_obj = Rangeradmin( url=policymgr_mgr_url, skip_if_rangeradmin_down=skip_if_rangeradmin_down) ranger_adm_obj.create_ranger_repository( service_name, repo_name, plugin_repo_dict, ranger_env_properties['ranger_admin_username'], ranger_env_properties['ranger_admin_password'], ranger_env_properties['admin_username'], ranger_env_properties['admin_password'], policy_user) else: cmd = (format('disable-{service_name}-plugin.sh'), ) cmd_env = { 'JAVA_HOME': java_home, 'PWD': format('/usr/lib/ranger-{service_name}-plugin'), 'PATH': format('/usr/lib/ranger-{service_name}-plugin') } Execute( cmd, environment=cmd_env, logoutput=True, sudo=True, )
def hive(name=None): import params XmlConfig( "hive-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['hive-site'], owner=params.hive_user, configuration_attributes=params.config['configuration_attributes'] ['hive-site']) if name in ["hiveserver2", "metastore"]: # Manually overriding service logon user & password set by the installation package service_name = params.service_map[name] ServiceConfig(service_name, action="change_user", username=params.hive_user, password=Script.get_password(params.hive_user)) Execute(format("cmd /c hadoop fs -mkdir -p {hive_warehouse_dir}"), logoutput=True, user=params.hadoop_user) if name == 'metastore': if params.init_metastore_schema: check_schema_created_cmd = format( 'cmd /c "{hive_bin}\\hive.cmd --service schematool -info ' '-dbType {hive_metastore_db_type} ' '-userName {hive_metastore_user_name} ' '-passWord {hive_metastore_user_passwd!p}' '&set EXITCODE=%ERRORLEVEL%&exit /B %EXITCODE%"', #cmd "feature", propagate the process exit code manually hive_bin=params.hive_bin, hive_metastore_db_type=params.hive_metastore_db_type, hive_metastore_user_name=params.hive_metastore_user_name, hive_metastore_user_passwd=params.hive_metastore_user_passwd) try: Execute(check_schema_created_cmd) except Fail: create_schema_cmd = format( 'cmd /c {hive_bin}\\hive.cmd --service schematool -initSchema ' '-dbType {hive_metastore_db_type} ' '-userName {hive_metastore_user_name} ' '-passWord {hive_metastore_user_passwd!p}', hive_bin=params.hive_bin, hive_metastore_db_type=params.hive_metastore_db_type, hive_metastore_user_name=params.hive_metastore_user_name, hive_metastore_user_passwd=params. hive_metastore_user_passwd) Execute(create_schema_cmd, user=params.hive_user, logoutput=True) if name == "hiveserver2": if params.hive_execution_engine == "tez": # Init the tez app dir in hadoop script_file = __file__.replace('/', os.sep) cmd_file = os.path.normpath( os.path.join(os.path.dirname(script_file), "..", "files", "hiveTezSetup.cmd")) Execute("cmd /c " + cmd_file, logoutput=True, user=params.hadoop_user)
local_dir = substitute_vars(_local_dir_conf, config['configurations']['ams-hbase-site']) phoenix_max_global_mem_percent = default( '/configurations/ams-site/phoenix.query.maxGlobalMemoryPercentage', '20') phoenix_client_spool_dir = default( '/configurations/ams-site/phoenix.spool.directory', '/tmp') phoenix_server_spool_dir = default( '/configurations/ams-hbase-site/phoenix.spool.directory', '/tmp') # Substitute vars if present phoenix_client_spool_dir = substitute_vars( phoenix_client_spool_dir, config['configurations']['ams-hbase-site']) phoenix_server_spool_dir = substitute_vars( phoenix_server_spool_dir, config['configurations']['ams-hbase-site']) client_jaas_config_file = format("{hbase_conf_dir}/hbase_client_jaas.conf") master_jaas_config_file = format("{hbase_conf_dir}/hbase_master_jaas.conf") regionserver_jaas_config_file = format( "{hbase_conf_dir}/hbase_regionserver_jaas.conf") rs_hosts = ["localhost"] smoke_test_user = config['configurations']['cluster-env']['smokeuser'] smokeuser_permissions = "RWXCA" service_check_data = functions.get_unique_id_and_date() user_group = config['configurations']['cluster-env']["user_group"] hadoop_user = "******" kinit_path_local = functions.get_kinit_path( default('/configurations/kerberos-env/executable_search_paths', None)) monitor_kinit_cmd = ""
stack_root = Script.get_stack_root() # e.g. /var/lib/ambari-agent/cache/stacks/HDP/2.2/services/zeppelin-stack/package service_packagedir = os.path.realpath(__file__).split('/scripts')[0] zeppelin_dirname = 'zeppelin-server' install_dir = os.path.join(stack_root, "current") executor_mem = config['configurations']['zeppelin-env']['zeppelin.executor.mem'] executor_instances = config['configurations']['zeppelin-env'][ 'zeppelin.executor.instances'] security_enabled = config['configurations']['cluster-env']['security_enabled'] spark_jar_dir = config['configurations']['zeppelin-env']['zeppelin.spark.jar.dir'] spark_jar = format("{spark_jar_dir}/zeppelin-spark-0.5.5-SNAPSHOT.jar") setup_view = True temp_file = config['configurations']['zeppelin-env']['zeppelin.temp.file'] spark_home = "" spark_version = None spark2_home = "" spark2_version = None if 'spark-defaults' in config['configurations']: spark_home = os.path.join(stack_root, "current", 'spark-client') spark_version = extract_spark_version(spark_home) if 'spark2-defaults' in config['configurations']: spark2_home = os.path.join(stack_root, "current", 'spark2-client') spark2_version = extract_spark_version(spark2_home) # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
def stop_zkfc_during_ru(): """ Restart ZKFC on either the standby or active Namenode. If done on the currently active namenode, wait for it to become the standby. This will run a kinit before executing the 'hdfs haadmin' command. """ import params # must kinit before running the HDFS command if params.security_enabled: Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user=params.hdfs_user) check_service_cmd = format("hdfs haadmin -getServiceState {namenode_id}") code, out = shell.call(check_service_cmd, logoutput=True, user=params.hdfs_user) original_state = "unknown" if code == 0 and out: original_state = "active" if "active" in out else ( "standby" if "standby" in out else original_state) Logger.info("Namenode service state: %s" % original_state) msg = "Rolling Upgrade - Killing ZKFC on {0} NameNode host {1} {2}"\ .format(original_state, params.hostname, "to initiate a failover" if original_state == "active" else "") Logger.info(msg) # Forcefully kill ZKFC. If this is the active, will initiate a failover. # If ZKFC is already dead, then potentially this node can still be the active one. was_zkfc_killed = kill_zkfc(params.hdfs_user) # Wait until it transitions to standby check_standby_cmd = format( "hdfs haadmin -getServiceState {namenode_id} | grep standby") # process may already be down. try one time, then proceed if original_state == "active": code, out = shell.call(check_standby_cmd, user=params.hdfs_user, logoutput=True) Logger.info( format("Rolling Upgrade - check for standby returned {code}")) if code == 255 and out: Logger.info("Rolling Upgrade - namenode is already down.") else: if was_zkfc_killed: # Only mandate that this be the standby namenode if ZKFC was indeed killed to initiate a failover. Logger.info( "Waiting for this NameNode to become the standby one.") Execute(check_standby_cmd, user=params.hdfs_user, tries=50, try_sleep=6, logoutput=True) else: raise Fail( "Unable to determine NameNode HA states by calling command: {0}". format(check_service_cmd))
def metadata(type='server'): import params # Needed by both Server and Client Directory(params.conf_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) if type == "server": Directory([params.pid_dir], mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(format('{conf_dir}/solr'), mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True, recursive_ownership=True) Directory(params.log_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.data_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.expanded_war_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) File(format("{expanded_war_dir}/atlas.war"), content=StaticFile( format('{metadata_home}/server/webapp/atlas.war'))) File(format("{conf_dir}/atlas-log4j.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=InlineTemplate(params.metadata_log4j_content)) File(format("{conf_dir}/atlas-env.sh"), owner=params.metadata_user, group=params.user_group, mode=0755, content=InlineTemplate(params.metadata_env_content)) files_to_chown = [ format("{conf_dir}/policy-store.txt"), format("{conf_dir}/users-credentials.properties") ] for file in files_to_chown: if os.path.exists(file): Execute( ('chown', format('{metadata_user}:{user_group}'), file), sudo=True) Execute(('chmod', '644', file), sudo=True) if params.metadata_solrconfig_content: File(format("{conf_dir}/solr/solrconfig.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=InlineTemplate(params.metadata_solrconfig_content)) # Needed by both Server and Client PropertiesFile(format('{conf_dir}/{conf_file}'), properties=params.application_properties, mode=0644, owner=params.metadata_user, group=params.user_group) if params.security_enabled: TemplateConfig(format(params.atlas_jaas_file), owner=params.metadata_user) if type == 'server' and params.search_backend_solr and params.has_infra_solr: solr_cloud_util.setup_solr_client(params.config) check_znode() jaasFile = params.atlas_jaas_file if params.security_enabled else None upload_conf_set('atlas_configs', jaasFile) if params.security_enabled: # update permissions before creating the collections solr_cloud_util.add_solr_roles( params.config, roles=[ params.infra_solr_role_atlas, params.infra_solr_role_ranger_audit, params.infra_solr_role_dev ], new_service_principals=[params.atlas_jaas_principal]) create_collection('vertex_index', 'atlas_configs', jaasFile) create_collection('edge_index', 'atlas_configs', jaasFile) create_collection('fulltext_index', 'atlas_configs', jaasFile) if params.security_enabled: secure_znode(format('{infra_solr_znode}/configs/atlas_configs'), jaasFile) secure_znode(format('{infra_solr_znode}/collections/vertex_index'), jaasFile) secure_znode(format('{infra_solr_znode}/collections/edge_index'), jaasFile) secure_znode( format('{infra_solr_znode}/collections/fulltext_index'), jaasFile) File(params.atlas_hbase_setup, group=params.user_group, owner=params.hbase_user, content=Template("atlas_hbase_setup.rb.j2")) is_atlas_upgrade_support = check_stack_feature( StackFeature.ATLAS_UPGRADE_SUPPORT, get_stack_feature_version(params.config)) if is_atlas_upgrade_support and params.security_enabled: File(params.atlas_kafka_setup, group=params.user_group, owner=params.kafka_user, content=Template("atlas_kafka_acl.sh.j2")) # files required only in case if kafka broker is not present on the host as configured component if not params.host_with_kafka: File(format("{kafka_conf_dir}/kafka-env.sh"), owner=params.kafka_user, content=InlineTemplate(params.kafka_env_sh_template)) File(format("{kafka_conf_dir}/kafka_jaas.conf"), group=params.user_group, owner=params.kafka_user, content=Template("kafka_jaas.conf.j2")) if params.stack_supports_atlas_hdfs_site_on_namenode_ha and len( params.namenode_host) > 1: XmlConfig( "hdfs-site.xml", conf_dir=params.conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes'] ['hdfs-site'], owner=params.metadata_user, group=params.user_group, mode=0644) else: File(format('{conf_dir}/hdfs-site.xml'), action="delete")
def rebalancehdfs(self, env): import params env.set_params(params) name_node_parameters = json.loads( params.name_node_params ) threshold = name_node_parameters['threshold'] _print("Starting balancer with threshold = %s\n" % threshold) rebalance_env = {'PATH': params.hadoop_bin_dir} if params.security_enabled: # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing HDFS rebalance command. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. # TODO: params.tmp_dir=/var/lib/ambari-agent/tmp. However hdfs user doesn't have access to this path. # TODO: Hence using /tmp ccache_file_name = "hdfs_rebalance_cc_" + _md5(format("{hdfs_principal_name}|{hdfs_user_keytab}")).hexdigest() ccache_file_path = os.path.join(tempfile.gettempdir(), ccache_file_name) rebalance_env['KRB5CCNAME'] = ccache_file_path # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache klist_cmd = format("{klist_path_local} -s {ccache_file_path}") kinit_cmd = format("{kinit_path_local} -c {ccache_file_path} -kt {hdfs_user_keytab} {hdfs_principal_name}") if shell.call(klist_cmd, user=params.hdfs_user)[0] != 0: Execute(kinit_cmd, user=params.hdfs_user) def calculateCompletePercent(first, current): # avoid division by zero try: division_result = current.bytesLeftToMove/first.bytesLeftToMove except ZeroDivisionError: Logger.warning("Division by zero. Bytes Left To Move = {0}. Return 1.0".format(first.bytesLeftToMove)) return 1.0 return 1.0 - division_result def startRebalancingProcess(threshold, rebalance_env): rebalanceCommand = format('hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}') return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env) command = startRebalancingProcess(threshold, rebalance_env) basedir = os.path.join(env.config.basedir, 'scripts') if(threshold == 'DEBUG'): #FIXME TODO remove this on PROD basedir = os.path.join(env.config.basedir, 'scripts', 'balancer-emulator') command = ['ambari-python-wrap','hdfs-command.py'] _print("Executing command %s\n" % command) parser = hdfs_rebalance.HdfsParser() def handle_new_line(line, is_stderr): if is_stderr: return _print('[balancer] %s' % (line)) pl = parser.parseLine(line) if pl: res = pl.toJson() res['completePercent'] = calculateCompletePercent(parser.initialLine, pl) self.put_structured_out(res) elif parser.state == 'PROCESS_FINISED' : _print('[balancer] %s' % ('Process is finished' )) self.put_structured_out({'completePercent' : 1}) return if (not hdfs_rebalance.is_balancer_running()): # As the rebalance may take a long time (haours, days) the process is triggered only # Tracking the progress based on the command output is no longer supported due to this Execute(command, wait_for_finish=False) _print("The rebalance process has been triggered") else: _print("There is another balancer running. This means you or another Ambari user may have triggered the " "operation earlier. The process may take a long time to finish (hours, even days). If the problem persists " "please consult with the HDFS administrators if they have triggred or killed the operation.") if params.security_enabled: # Delete the kerberos credentials cache (ccache) file File(ccache_file_path, action = "delete", )
def oozie_service(action = 'start', upgrade_type=None): """ Starts or stops the Oozie service :param action: 'start' or 'stop' :param upgrade_type: type of upgrade, either "rolling" or "non_rolling" skipped since a variation of them was performed during the rolling upgrade :return: """ import params environment={'OOZIE_CONFIG': params.conf_dir} if params.security_enabled: if params.oozie_principal is None: oozie_principal_with_host = 'missing_principal' else: oozie_principal_with_host = params.oozie_principal.replace("_HOST", params.hostname) kinit_if_needed = format("{kinit_path_local} -kt {oozie_keytab} {oozie_principal_with_host};") else: kinit_if_needed = "" no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user) if action == 'start': start_cmd = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-start.sh") path_to_jdbc = params.target if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": if not params.jdbc_driver_jar: path_to_jdbc = format("{oozie_libext_dir}/") + \ params.default_connectors_map[params.jdbc_driver_name] if params.jdbc_driver_name in params.default_connectors_map else None if not os.path.isfile(path_to_jdbc): path_to_jdbc = format("{oozie_libext_dir}/") + "*" error_message = "Error! Sorry, but we can't find jdbc driver with default name " + params.default_connectors_map[params.jdbc_driver_name] + \ " in oozie lib dir. So, db connection check can fail. Please run 'ambari-server setup --jdbc-db={db_name} --jdbc-driver={path_to_jdbc} on server host.'" Logger.error(error_message) db_connection_check_command = format("{java_home}/bin/java -cp {check_db_connection_jar}:{path_to_jdbc} org.apache.ambari.server.DBConnectionVerification '{oozie_jdbc_connection_url}' {oozie_metastore_user_name} {oozie_metastore_user_passwd!p} {jdbc_driver_name}") else: db_connection_check_command = None if upgrade_type is None: if not os.path.isfile(path_to_jdbc) and params.jdbc_driver_name == "org.postgresql.Driver": print format("ERROR: jdbc file {target} is unavailable. Please, follow next steps:\n" \ "1) Download postgresql-9.0-801.jdbc4.jar.\n2) Create needed directory: mkdir -p {oozie_home}/libserver/\n" \ "3) Copy postgresql-9.0-801.jdbc4.jar to newly created dir: cp /path/to/jdbc/postgresql-9.0-801.jdbc4.jar " \ "{oozie_home}/libserver/\n4) Copy postgresql-9.0-801.jdbc4.jar to libext: cp " \ "/path/to/jdbc/postgresql-9.0-801.jdbc4.jar {oozie_home}/libext/\n") exit(1) if db_connection_check_command: sudo.chmod(params.check_db_connection_jar, 0755) Execute( db_connection_check_command, tries=5, try_sleep=10, user=params.oozie_user, ) Execute( format("cd {oozie_tmp_dir} && {oozie_home}/bin/ooziedb.sh create -sqlfile oozie.sql -run"), user = params.oozie_user, not_if = no_op_test, ignore_failures = True ) if params.security_enabled: Execute(kinit_if_needed, user = params.oozie_user, ) if params.sysprep_skip_copy_oozie_share_lib_to_hdfs: Logger.info("Skipping creation of oozie sharelib as host is sys prepped") # Copy current hive-site to hdfs:/user/oozie/share/lib/spark/ params.HdfsResource(format("{hdfs_share_dir}/lib/spark/hive-site.xml"), action="create_on_execute", type = 'file', mode=0444, owner=params.oozie_user, group=params.user_group, source=format("{hive_conf_dir}/hive-site.xml"), ) params.HdfsResource(None, action="execute") hdfs_share_dir_exists = True # skip time-expensive hadoop fs -ls check elif WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hadoop fs -ls. util = WebHDFSUtil(params.hdfs_site, params.oozie_user, params.security_enabled) list_status = util.run_command(params.hdfs_share_dir, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) hdfs_share_dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hadoop fs -ls check. hdfs_share_dir_exists = shell.call(format("{kinit_if_needed} hadoop --config {hadoop_conf_dir} dfs -ls {hdfs_share_dir} | awk 'BEGIN {{count=0;}} /share/ {{count++}} END {{if (count > 0) {{exit 0}} else {{exit 1}}}}'"), user=params.oozie_user)[0] if not hdfs_share_dir_exists: Execute( params.put_shared_lib_to_hdfs_cmd, user = params.oozie_user, path = params.execute_path ) params.HdfsResource(format("{oozie_hdfs_user_dir}/share"), type="directory", action="create_on_execute", mode=0755, recursive_chmod=True, ) params.HdfsResource(None, action="execute") try: # start oozie Execute( start_cmd, environment=environment, user = params.oozie_user, not_if = no_op_test ) copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type, params.upgrade_direction) except: show_logs(params.oozie_log_dir, params.oozie_user) raise elif action == 'stop': Directory(params.oozie_tmp_dir, owner=params.oozie_user, create_parents = True, ) stop_cmd = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozied.sh stop 60 -force") try: # stop oozie Execute(stop_cmd, environment=environment, only_if = no_op_test, user = params.oozie_user) except: show_logs(params.oozie_log_dir, params.oozie_user) raise File(params.pid_file, action = "delete")
at_split = principal.split('@') return at_split[0] # config object that holds the configurations declared in the -site.xml file config = Script.get_config() tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY security_enabled = status_params.security_enabled logsearch_server_conf = "/etc/ambari-logsearch-portal/conf" logsearch_server_keys_folder = logsearch_server_conf + "/keys" logsearch_logfeeder_conf = "/etc/ambari-logsearch-logfeeder/conf" logsearch_config_set_dir = format("{logsearch_server_conf}/solr_configsets") # logsearch pid file logsearch_pid_dir = status_params.logsearch_pid_dir logsearch_pid_file = status_params.logsearch_pid_file # logfeeder pid file logfeeder_pid_dir = status_params.logfeeder_pid_dir logfeeder_pid_file = status_params.logfeeder_pid_file user_group = config['configurations']['cluster-env']['user_group'] # shared configs java64_home = config['hostLevelParams']['java_home'] cluster_name = str(config['clusterName'])
stack_supports_ranger_hive_jdbc_url_change = check_stack_feature( StackFeature.RANGER_HIVE_PLUGIN_JDBC_URL, version_for_stack_feature_checks) stack_supports_atlas_hook_for_hive_interactive = check_stack_feature( StackFeature.HIVE_INTERACTIVE_ATLAS_HOOK_REQUIRED, version_for_stack_feature_checks) stack_supports_hive_interactive_ga = check_stack_feature( StackFeature.HIVE_INTERACTIVE_GA_SUPPORT, version_for_stack_feature_checks) # component ROLE directory (like hive-metastore or hive-server2-hive2) component_directory = status_params.component_directory component_directory_interactive = status_params.component_directory_interactive hadoop_home = stack_select.get_hadoop_dir("home") hadoop_lib_home = stack_select.get_hadoop_dir("lib") hive_bin = format('{stack_root}/current/{component_directory}/bin') hive_cmd = os.path.join(hive_bin, "hive") hive_schematool_ver_bin = format('{stack_root}/{version}/hive/bin') hive_schematool_bin = format('{stack_root}/current/{component_directory}/bin') hive_lib = format('{stack_root}/current/{component_directory}/lib') hive_version_lib = format('{stack_root}/{version}/hive/lib') hive_var_lib = '/var/lib/hive' hive_user_home_dir = "/home/hive" # starting on stacks where HSI is supported, we need to begin using the 'hive2' schematool hive_server2_hive2_dir = None hive_server2_hive2_lib = None if check_stack_feature(StackFeature.HIVE_SERVER_INTERACTIVE, version_for_stack_feature_checks): # the name of the hiveserver2-hive2 component
def fill_conf_dir(component_conf_dir): import params hive_client_conf_path = os.path.realpath( format("{stack_root}/current/{component_directory}/conf")) component_conf_dir = os.path.realpath(component_conf_dir) mode_identified_for_file = 0644 if component_conf_dir == hive_client_conf_path else 0600 mode_identified_for_dir = 0755 if component_conf_dir == hive_client_conf_path else 0700 Directory(component_conf_dir, owner=params.hive_user, group=params.user_group, create_parents=True, mode=mode_identified_for_dir) XmlConfig( "mapred-site.xml", conf_dir=component_conf_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes'] ['mapred-site'], owner=params.hive_user, group=params.user_group, mode=mode_identified_for_file) File(format("{component_conf_dir}/hive-default.xml.template"), owner=params.hive_user, group=params.user_group, mode=mode_identified_for_file) File(format("{component_conf_dir}/hive-env.sh.template"), owner=params.hive_user, group=params.user_group, mode=mode_identified_for_file) # Create hive-log4j.properties and hive-exec-log4j.properties # in /etc/hive/conf and not in /etc/hive2/conf if params.log4j_version == '1': log4j_exec_filename = 'hive-exec-log4j.properties' if (params.log4j_exec_props != None): File(format("{component_conf_dir}/{log4j_exec_filename}"), mode=mode_identified_for_file, group=params.user_group, owner=params.hive_user, content=InlineTemplate(params.log4j_exec_props)) elif (os.path.exists( "{component_conf_dir}/{log4j_exec_filename}.template")): File(format("{component_conf_dir}/{log4j_exec_filename}"), mode=mode_identified_for_file, group=params.user_group, owner=params.hive_user, content=StaticFile( format( "{component_conf_dir}/{log4j_exec_filename}.template") )) log4j_filename = 'hive-log4j.properties' if (params.log4j_props != None): File(format("{component_conf_dir}/{log4j_filename}"), mode=mode_identified_for_file, group=params.user_group, owner=params.hive_user, content=InlineTemplate(params.log4j_props)) elif (os.path.exists("{component_conf_dir}/{log4j_filename}.template") ): File(format("{component_conf_dir}/{log4j_filename}"), mode=mode_identified_for_file, group=params.user_group, owner=params.hive_user, content=StaticFile( format("{component_conf_dir}/{log4j_filename}.template"))) if params.parquet_logging_properties is not None: File(format("{component_conf_dir}/parquet-logging.properties"), mode=mode_identified_for_file, group=params.user_group, owner=params.hive_user, content=params.parquet_logging_properties)
def setup_atlas_hive(configuration_directory=None): import params if params.has_atlas: if configuration_directory is None: configuration_directory = format("{hive_config_dir}")
from ambari_commons.constants import AMBARI_SUDO_BINARY config = Script.get_config() ams_collector_conf_dir = "/etc/ambari-metrics-collector/conf" ams_monitor_conf_dir = "/etc/ambari-metrics-monitor/conf/" ams_user = config['configurations']['ams-env']['ambari_metrics_user'] #RPM versioning support rpm_version = default("/configurations/hadoop-env/rpm_version", None) #hadoop params if rpm_version is not None: #RPM versioning support rpm_version = default("/configurations/hadoop-env/rpm_version", None) hadoop_native_lib = format("/usr/lib/ams-hbase/lib/hadoop-native") hadoop_bin_dir = "/usr/bin" daemon_script = "/usr/lib/ams-hbase/bin/hbase-daemon.sh" region_mover = "/usr/lib/ams-hbase/bin/region_mover.rb" region_drainer = "/usr/lib/ams-hbase/bin/draining_servers.rb" hbase_cmd = "/usr/lib/ams-hbase/bin/hbase" hadoop_conf_dir = conf_select.get_hadoop_conf_dir() hbase_conf_dir = "/etc/ams-hbase/conf" limits_conf_dir = "/etc/security/limits.d" sudo = AMBARI_SUDO_BINARY dfs_type = default("/commandParams/dfs_type", "")
def create_ranger_repository(self, component, repo_name, repo_properties, ambari_ranger_admin, ambari_ranger_password, admin_uname, admin_password, policy_user): response_code, response_recieved = self.check_ranger_login_urllib2(self.url_login, 'test:test') repo_data = json.dumps(repo_properties) if response_code is not None and response_code == 200: ambari_ranger_admin, ambari_ranger_password = self.create_ambari_admin_user(ambari_ranger_admin, ambari_ranger_password, format("{admin_uname}:{admin_password}")) ambari_username_password_for_ranger = ambari_ranger_admin + ':' + ambari_ranger_password if ambari_ranger_admin != '' and ambari_ranger_password != '': repo = self.get_repository_by_name_urllib2(repo_name, component, 'true', ambari_username_password_for_ranger) if repo and repo['name'] == repo_name: Logger.info('{0} Repository exist'.format(component.title())) else: response = self.create_repository_urllib2(repo_data, ambari_username_password_for_ranger) if response is not None: Logger.info('{0} Repository created in Ranger admin'.format(component.title())) else: Logger.error('{0} Repository creation failed in Ranger admin'.format(component.title())) else: Logger.error('Ambari admin username and password are blank ')
def action_remove(self): with Environment.get_instance_copy() as env: repo_file_name = self.resource.repo_file_name repo_dir = get_repo_dir() File(format("{repo_dir}/{repo_file_name}.repo"), action="delete")
def setup_ranger_audit_solr(): import params if params.security_enabled and params.stack_supports_ranger_kerberos: if params.solr_jaas_file is not None: File(format("{solr_jaas_file}"), content=Template("ranger_solr_jaas_conf.j2"), owner=params.unix_user) try: check_znode() if params.stack_supports_ranger_solr_configs: Logger.info( 'Solr configrations supported,creating solr-configurations.') File(format("{ranger_solr_conf}/solrconfig.xml"), content=InlineTemplate(params.ranger_solr_config_content), owner=params.unix_user, group=params.unix_group, mode=0644) solr_cloud_util.upload_configuration_to_zk( zookeeper_quorum=params.zookeeper_quorum, solr_znode=params.solr_znode, config_set=params.ranger_solr_config_set, config_set_dir=params.ranger_solr_conf, tmp_dir=params.tmp_dir, java64_home=params.ambari_java_home, solrconfig_content=InlineTemplate( params.ranger_solr_config_content), jaas_file=params.solr_jaas_file, retry=30, interval=5) else: Logger.info( 'Solr configrations not supported, skipping solr-configurations.' ) solr_cloud_util.upload_configuration_to_zk( zookeeper_quorum=params.zookeeper_quorum, solr_znode=params.solr_znode, config_set=params.ranger_solr_config_set, config_set_dir=params.ranger_solr_conf, tmp_dir=params.tmp_dir, java64_home=params.ambari_java_home, jaas_file=params.solr_jaas_file, retry=30, interval=5) if params.security_enabled and params.has_infra_solr \ and not params.is_external_solrCloud_enabled and params.stack_supports_ranger_kerberos: solr_cloud_util.add_solr_roles( params.config, roles=[ params.infra_solr_role_ranger_admin, params.infra_solr_role_ranger_audit, params.infra_solr_role_dev ], new_service_principals=[params.ranger_admin_jaas_principal]) service_default_principals_map = [('hdfs', 'nn'), ('hbase', 'hbase'), ('hive', 'hive'), ('kafka', 'kafka'), ('kms', 'rangerkms'), ('knox', 'knox'), ('nifi', 'nifi'), ('storm', 'storm'), ('yanr', 'yarn')] service_principals = get_ranger_plugin_principals( service_default_principals_map) solr_cloud_util.add_solr_roles( params.config, roles=[ params.infra_solr_role_ranger_audit, params.infra_solr_role_dev ], new_service_principals=service_principals) solr_cloud_util.create_collection( zookeeper_quorum=params.zookeeper_quorum, solr_znode=params.solr_znode, collection=params.ranger_solr_collection_name, config_set=params.ranger_solr_config_set, java64_home=params.ambari_java_home, shards=params.ranger_solr_shards, replication_factor=int(params.replication_factor), jaas_file=params.solr_jaas_file) if params.security_enabled and params.has_infra_solr \ and not params.is_external_solrCloud_enabled and params.stack_supports_ranger_kerberos: secure_znode( format('{solr_znode}/configs/{ranger_solr_config_set}'), params.solr_jaas_file) secure_znode( format( '{solr_znode}/collections/{ranger_solr_collection_name}'), params.solr_jaas_file) except ExecutionFailed as execution_exception: Logger.error( 'Error when configuring Solr for Ranger, Kindly check Solr/Zookeeper services to be up and running:\n {0}' .format(execution_exception))
def hive_service(name, action='start', upgrade_type=None): import params if name == 'metastore': pid_file = format("{hive_pid_dir}/{hive_metastore_pid}") cmd = format( "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) elif name == 'hiveserver2': pid_file = format("{hive_pid_dir}/{hive_pid}") cmd = format( "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) if params.security_enabled and params.current_version != None and ( params.current_version.startswith("2.2.4") or params.current_version.startswith("2.2.3")): hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root() daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.version: import os hadoop_home = format("/usr/hdp/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute(daemon_cmd, user=params.hive_user, environment={ 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}" ) Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" )) # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File(pid_file, action="delete")
db_host = config['configurations']['kms-properties']['db_host'] db_name = config['configurations']['kms-properties']['db_name'] db_user = config['configurations']['kms-properties']['db_user'] db_password = unicode( config['configurations']['kms-properties']['db_password']) kms_master_key_password = unicode( config['configurations']['kms-properties']['KMS_MASTER_KEY_PASSWD']) credential_provider_path = config['configurations']['dbks-site'][ 'ranger.ks.jpa.jdbc.credential.provider.path'] jdbc_alias = config['configurations']['dbks-site'][ 'ranger.ks.jpa.jdbc.credential.alias'] masterkey_alias = config['configurations']['dbks-site'][ 'ranger.ks.masterkey.credential.alias'] repo_name = str(config['clusterName']) + '_kms' cred_lib_path = os.path.join(kms_home, "cred", "lib", "*") cred_setup_prefix = (format('{kms_home}/ranger_credential_helper.py'), '-l', cred_lib_path) credential_file = format('/etc/ranger/{repo_name}/cred.jceks') if has_ranger_admin: policymgr_mgr_url = config['configurations']['admin-properties'][ 'policymgr_external_url'] xa_audit_db_flavor = ( config['configurations']['admin-properties']['DB_FLAVOR']).lower() xa_audit_db_name = config['configurations']['admin-properties'][ 'audit_db_name'] xa_audit_db_user = config['configurations']['admin-properties'][ 'audit_db_user'] xa_audit_db_password = config['configurations']['admin-properties'][ 'audit_db_password'] xa_db_host = config['configurations']['admin-properties']['db_host']
# alluxio underfs address underfs_addr = config['configurations']['alluxio-env']['alluxio.underfs.address'] # alluxio worker memory alotment worker_mem = config['configurations']['alluxio-env']['alluxio.worker.memory'] # Find current stack and version to push agent files to stack_name = default("/hostLevelParams/stack_name", None) stack_version = config['hostLevelParams']['stack_version'] # hadoop params namenode_address = None if 'dfs.namenode.rpc-address' in config['configurations']['hdfs-site']: namenode_rpcaddress = config['configurations']['hdfs-site']['dfs.namenode.rpc-address'] namenode_address = format("hdfs://{namenode_rpcaddress}") else: namenode_address = config['configurations']['core-site']['fs.defaultFS'] host_name = config['hostname']; alluxio_master = '#alluxio.master.hostname=' + host_name alluxio_master_web_port = 'alluxio.master.web.port=' + config['configurations']['alluxio-env']['alluxio.master.web.port'] # HA enabled_ha = 'alluxio.zookeeper.enabled=false' zk_addr = '#alluxio.zookeeper.address=' + config['configurations']['alluxio-env']['alluxio.zookeeper.address'] journal_folder = 'alluxio.master.journal.folder=' + config['configurations']['alluxio-env']['alluxio.master.journal.folder'] worker_timeout = 'alluxio.worker.block.heartbeat.timeout.ms=120000' if len(alluxio_masters) > 1: enabled_ha = 'alluxio.zookeeper.enabled=true'
def jdbc_connector(target, hive_previous_jdbc_jar): """ Shared by Hive Batch, Hive Metastore, and Hive Interactive :param target: Target of jdbc jar name, which could be for any of the components above. """ import params if not params.jdbc_jar_name: return if params.hive_jdbc_driver in params.hive_jdbc_drivers_list and params.hive_use_existing_db: environment = {"no_proxy": format("{ambari_server_hostname}")} if hive_previous_jdbc_jar and os.path.isfile(hive_previous_jdbc_jar): File(hive_previous_jdbc_jar, action='delete') # TODO: should be removed after ranger_hive_plugin will not provide jdbc if params.prepackaged_jdbc_name != params.jdbc_jar_name: Execute(('rm', '-f', params.prepackaged_ojdbc_symlink), path=["/bin", "/usr/bin/"], sudo=True) File(params.downloaded_custom_connector, content=DownloadSource(params.driver_curl_source)) # maybe it will be more correcvly to use db type if params.sqla_db_used: untar_sqla_type2_driver = ('tar', '-xvf', params.downloaded_custom_connector, '-C', params.tmp_dir) Execute(untar_sqla_type2_driver, sudo=True) Execute( format("yes | {sudo} cp {jars_path_in_archive} {hive_lib}")) Directory(params.jdbc_libs_dir, create_parents=True) Execute( format( "yes | {sudo} cp {libs_path_in_archive} {jdbc_libs_dir}")) Execute( format( "{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*")) else: Execute( ('cp', '--remove-destination', params.downloaded_custom_connector, target), #creates=target, TODO: uncomment after ranger_hive_plugin will not provide jdbc path=["/bin", "/usr/bin/"], sudo=True) else: #for default hive db (Mysql) Execute( ('cp', '--remove-destination', format('/usr/share/java/{jdbc_jar_name}'), target), #creates=target, TODO: uncomment after ranger_hive_plugin will not provide jdbc path=["/bin", "/usr/bin/"], sudo=True) pass File( target, mode=0644, )
def hive(name=None): import params install_lzo_if_needed() hive_client_conf_path = format( "{stack_root}/current/{component_directory}/conf") # Permissions 644 for conf dir (client) files, and 600 for conf.server mode_identified = 0644 if params.hive_config_dir == hive_client_conf_path else 0600 Directory(params.hive_etc_dir_prefix, mode=0755) # We should change configurations for client as well as for server. # The reason is that stale-configs are service-level, not component. Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list)) for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) params.hive_site_config = update_credential_provider_path( params.hive_site_config, 'hive-site', os.path.join(params.hive_conf_dir, 'hive-site.jceks'), params.hive_user, params.user_group) XmlConfig( "hive-site.xml", conf_dir=params.hive_config_dir, configurations=params.hive_site_config, configuration_attributes=params.config['configuration_attributes'] ['hive-site'], owner=params.hive_user, group=params.user_group, mode=mode_identified) # Generate atlas-application.properties.xml file if params.enable_atlas_hook: atlas_hook_filepath = os.path.join(params.hive_config_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.hive_user, params.user_group) File(format("{hive_config_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, content=InlineTemplate(params.hive_env_sh_template), mode=mode_identified) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2")) if params.security_enabled: File(os.path.join(params.hive_config_dir, 'zkmigrator_jaas.conf'), owner=params.hive_user, group=params.user_group, content=Template("zkmigrator_jaas.conf.j2")) File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), mode=0644, ) if name != "client": setup_non_client() if name == 'hiveserver2': setup_hiveserver2() if name == 'metastore': setup_metastore()