def create_repo(url, data, usernamepassword): try: base_url = url + '/service/public/v2/api/service' base64string = base64.encodestring('{0}'.format(usernamepassword)).replace('\n', '') headers = { 'Accept': 'application/json', "Content-Type": "application/json" } request = urllib2.Request(base_url, data, headers) request.add_header("Authorization", "Basic {0}".format(base64string)) result = urllib2.urlopen(request, timeout=20) response_code = result.getcode() response = json.loads(json.JSONEncoder().encode(result.read())) if response_code == 200: Logger.info('Repository created Successfully') return True else: Logger.info('Repository not created') return False except urllib2.URLError, e: if isinstance(e, urllib2.HTTPError): Logger.error("Error creating service. Http status code - {0}. \n {1}".format(e.code, e.read())) return False else: Logger.error("Error creating service. Reason - {0}.".format(e.reason)) return False
def install(self, env): Logger.info("install mysql cluster management") import params env.set_params(params) # remove_mysql_cmd = "yum remove mysql -y" # Execute(remove_mysql_cmd, # user='******' # ) remove_mysql_lib_cmd = "rpm -e --nodeps mysql-libs-5.1.71-1.el6.x86_64 >/dev/null 2>&1" Execute(remove_mysql_lib_cmd, user='******' ) install_server_cmd = "yum -y install MySQL-Cluster-server-gpl-7.4.11-1.el6.x86_64" Execute(install_server_cmd, user='******' ) # create mgm config dir Directory(params.mgm_config_path, owner='root', group='root', recursive=True ) # create cluster config Logger.info('create config.ini in /var/lib/mysql-cluster') File(format("{mgm_config_path}/config.ini"), owner = 'root', group = 'root', mode = 0644, content=InlineTemplate(params.config_ini_template) ) Execute('ndb_mgmd -f /var/lib/mysql-cluster/config.ini --initial >/dev/null 2>&1', logoutput = True) # create pid file pid_cmd = "pgrep -o -f ^ndb_mgmd.* > {0}".format(params.mgm_pid_file) Execute(pid_cmd, logoutput=True)
def remove_package(self, name): if self._check_existence(name): cmd = REMOVE_CMD % (name) Logger.info("Removing package %s ('%s')" % (name, cmd)) shell.checked_call(cmd) else: Logger.info("Skipping removing non-existent package %s" % (name))
def servicechecktest(self, env): from params import params env.set_params(params) from service_check import ServiceCheck service_check = ServiceCheck() Logger.info('Service Check Test') service_check.service_check(env)
def action_create(self): path = self.resource.path if os.path.isdir(path): raise Fail("Applying %s failed, directory with name %s exists" % (self.resource, path)) dirname = os.path.dirname(path) if not os.path.isdir(dirname): raise Fail("Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname)) write = False content = self._get_content() if not os.path.exists(path): write = True reason = "it doesn't exist" elif self.resource.replace: if content is not None: with open(path, "rb") as fp: old_content = fp.read() if content != old_content: write = True reason = "contents don't match" if self.resource.backup: self.resource.env.backup_file(path) if write: Logger.info("Writing %s because %s" % (self.resource, reason)) with open(path, "wb") as fp: if content: fp.write(content) if self.resource.owner and self.resource.mode: _set_file_acl(self.resource.path, self.resource.owner, self.resource.mode)
def check_service_status(self, service, keyword): cmd = "service {0} status | grep -E '{1}'".format(service, keyword) Logger.info("run service check on {0} : ".format(service)) (status, output) = commands.getstatusoutput(cmd) if (output == ""): Logger.error("service {0} not running".format(service)) raise ComponentIsNotRunning()
def install_package(self, name): if not self._check_existence(name): cmd = INSTALL_CMD % (name) Logger.info("Installing package %s ('%s')" % (name, cmd)) shell.checked_call(cmd) else: Logger.info("Skipping installing existent package %s" % (name))
def setup_solr_cloud(): import params code, output = call( format( '{zk_client_prefix} -cmd get {solr_cloud_zk_directory}{clusterstate_json}' ), env={'JAVA_HOME': params.java64_home}, timeout=60 ) if not ("NoNodeException" in output): Logger.info( format( "ZK node {solr_cloud_zk_directory}{clusterstate_json} already exists, skipping ..." ) ) return Execute( format( '{zk_client_prefix} -cmd makepath {solr_cloud_zk_directory}' ), environment={'JAVA_HOME': params.java64_home}, ignore_failures=True, user=params.solr_config_user )
def check_process(self, keyword): Logger.info("check process with: {0}".format(keyword)) cmd = "ps aux | grep -E '" + keyword + "' | grep -v grep | cat" result = self.exe(cmd) if (result == ""): Logger.error("process {0} not exist".format(keyword)) raise ComponentIsNotRunning()
def check_process(keyword): Logger.info("check process by: {0}".format(keyword)) cmd = "ps aux | grep -E '" + keyword + "' | grep -v grep | cat" result = Toolkit.exe(cmd) if (result == ""): Logger.error("process checked by {0} not exist".format(keyword)) raise ComponentIsNotRunning()
def _get_directory_mappings_during_upgrade(): """ Gets a dictionary of directory to archive name that represents the directories that need to be backed up and their output tarball archive targets :return: the dictionary of directory to tarball mappings """ import params # Must be performing an Upgrade if params.upgrade_direction is None or params.upgrade_direction != Direction.UPGRADE or \ params.upgrade_from_version is None or params.upgrade_from_version == "": Logger.error("Function _get_directory_mappings_during_upgrade() can only be called during a Stack Upgrade in direction UPGRADE.") return {} # By default, use this for all stacks. knox_data_dir = '/var/lib/knox/data' if params.stack_name and params.stack_name.upper() == "HDP" and \ compare_versions(format_hdp_stack_version(params.upgrade_from_version), "2.3.0.0") > 0: # Use the version that is being upgraded from. knox_data_dir = format('/usr/hdp/{upgrade_from_version}/knox/data') # the trailing "/" is important here so as to not include the "conf" folder itself directories = {knox_data_dir: BACKUP_DATA_ARCHIVE, params.knox_conf_dir + "/": BACKUP_CONF_ARCHIVE} Logger.info(format("Knox directories to backup:\n{directories}")) return directories
def stop(self, env): Logger.info("stop sql node") import params env.set_params(params) Execute(params.stop_sql_node_cmd, user='******' )
def configure(self, env, upgrade_type=None, config_dir=None): from params import params env.set_params(params) Logger.info("Running profiler configure") File(format("{metron_config_path}/profiler.properties"), content=Template("profiler.properties.j2"), owner=params.metron_user, group=params.metron_group ) if not metron_service.is_zk_configured(params): metron_service.init_zk_config(params) metron_service.set_zk_configured(params) metron_service.refresh_configs(params) commands = ProfilerCommands(params) if not commands.is_hbase_configured(): commands.create_hbase_tables() if params.security_enabled and not commands.is_hbase_acl_configured(): commands.set_hbase_acls() if params.security_enabled and not commands.is_acl_configured(): commands.init_kafka_acls() commands.set_acl_configured() Logger.info("Calling security setup") storm_security_setup(params) if not commands.is_configured(): commands.set_configured()
def create_hbase_tables(self): Logger.info("Creating HBase Tables") metron_service.create_hbase_table(self.__params, self.__params.user_settings_hbase_table, self.__params.user_settings_hbase_cf) Logger.info("Done creating HBase Tables") self.set_hbase_configured()
def start(self, env): Logger.info("start sql node") import params env.set_params(params) Execute("mysqld_safe >/dev/null 2>&1 &", user='******' )
def pre_rolling_restart(self, env): Logger.info("Executing Rolling Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Execute(format("hdp-select set zookeeper-server {version}"))
def start_rest_application(self): """ Start the REST application """ Logger.info('Starting REST application') if self.__params.security_enabled: kinit(self.__params.kinit_path_local, self.__params.metron_keytab_path, self.__params.metron_principal_name, execute_user=self.__params.metron_user) # Get the PID associated with the service pid_file = format("{metron_rest_pid_dir}/{metron_rest_pid}") pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=self.__params.metron_user, is_checked_call=False)[1] process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") # Set the password with env variable instead of param to avoid it showing in ps cmd = format(( "export METRON_JDBC_PASSWORD={metron_jdbc_password!p};" "export JAVA_HOME={java_home};" "export METRON_REST_CLASSPATH={metron_rest_classpath};" "export METRON_INDEX_CP={metron_indexing_classpath};" "export METRON_LOG_DIR={metron_log_dir};" "export METRON_PID_FILE={pid_file};" "{metron_home}/bin/metron-rest.sh;" "unset METRON_JDBC_PASSWORD;" )) Execute(cmd, user = self.__params.metron_user, logoutput=True, not_if = process_id_exists_command, timeout=60) Logger.info('Done starting REST application')
def install(self, env): import params env.set_params(params) commands = Commands(params) commands.setup_repo() Logger.info('Install RPM packages') self.install_packages(env)
def init_kafka_topics(self): Logger.info('Creating Kafka topics') command_template = """{0}/kafka-topics.sh \ --zookeeper {1} \ --create \ --topic {2} \ --partitions {3} \ --replication-factor {4} \ --config retention.bytes={5}""" num_partitions = 1 replication_factor = 1 retention_gigabytes = int(self.__params.metron_topic_retention) retention_bytes = retention_gigabytes * 1024 * 1024 * 1024 Logger.info("Creating main topics for parsers") for parser_name in self.get_parser_list(): Logger.info("Creating topic'{0}'".format(parser_name)) Execute(command_template.format(self.__params.kafka_bin_dir, self.__params.zookeeper_quorum, parser_name, num_partitions, replication_factor, retention_bytes)) Logger.info("Creating topics for error handling") Execute(command_template.format(self.__params.kafka_bin_dir, self.__params.zookeeper_quorum, "parser_invalid", num_partitions, replication_factor, retention_bytes)) Execute(command_template.format(self.__params.kafka_bin_dir, self.__params.zookeeper_quorum, "parser_error", num_partitions, replication_factor, retention_bytes)) Logger.info("Done creating Kafka topics")
def _check_nodemanager_startup(): ''' Checks that a NodeManager is in a RUNNING state in the cluster via "yarn node -list -states=RUNNING" command. Once the NodeManager is found to be alive this method will return, otherwise it will raise a Fail(...) and retry automatically. :return: ''' import params command = 'yarn node -list -states=RUNNING' try: # 'su - yarn -c "yarn node -status c6401.ambari.apache.org:45454"' return_code, yarn_output = shell.call(command, user=params.hdfs_user) except: raise Fail('Unable to determine if the NodeManager has started after upgrade.') if return_code == 0: hostname = params.hostname.lower() nodemanager_address = params.nm_address.lower() yarn_output = yarn_output.lower() if hostname in yarn_output or nodemanager_address in yarn_output: Logger.info('NodeManager with ID {0} has rejoined the cluster.'.format(nodemanager_address)) return else: raise Fail('NodeManager with ID {0} was not found in the list of running NodeManagers'.format(nodemanager_address)) raise Fail('Unable to determine if the NodeManager has started after upgrade (result code {0})'.format(str(return_code)))
def pre_upgrade_deregister(): """ Runs the "hive --service hiveserver2 --deregister <version>" command to de-provision the server in preparation for an upgrade. This will contact ZooKeeper to remove the server so that clients that attempt to connect will be directed to other servers automatically. Once all clients have drained, the server will shutdown automatically; this process could take a very long time. This function will obtain the Kerberos ticket if security is enabled. :return: """ import params Logger.info('HiveServer2 executing "deregister" command in preparation for upgrade...') if params.security_enabled: kinit_command=format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; ") Execute(kinit_command,user=params.smokeuser) # calculate the current hive server version current_hiveserver_version = _get_current_hiveserver_version() if current_hiveserver_version is None: raise Fail('Unable to determine the current HiveServer2 version to deregister.') # deregister command = 'hive --service hiveserver2 --deregister ' + current_hiveserver_version Execute(command, user=params.hive_user, path=params.execute_path, tries=1 )
def stop(self, env, upgrade_type=None): import params env.set_params(params) Logger.info("Stop Kibana Master") Execute("service kibana stop")
def check_indexer_parameters(): """ Ensure that all required parameters have been defined for the chosen Indexer; either Solr or Elasticsearch. """ missing = [] config = Script.get_config() indexer = config['configurations']['metron-indexing-env']['ra_indexing_writer'] Logger.info('Checking parameters for indexer = ' + indexer) if indexer == 'Solr': # check for all required solr parameters if not config['configurations']['metron-env']['solr_zookeeper_url']: missing.append("metron-env/solr_zookeeper_url") else: # check for all required elasticsearch parameters if not config['configurations']['metron-env']['es_cluster_name']: missing.append("metron-env/es_cluster_name") if not config['configurations']['metron-env']['es_hosts']: missing.append("metron-env/es_hosts") if not config['configurations']['metron-env']['es_date_format']: missing.append("metron-env/es_date_format") if len(missing) > 0: raise Fail("Missing required indexing parameters(s): indexer={0}, missing={1}".format(indexer, missing))
def check_kafka_topics(params, topics): """ Validates that the Kafka topics exist. An exception is raised if any of the topics do not exist. :param params: :param topics: A list of topic names. """ # if needed kinit as 'metron' if params.security_enabled: kinit(params.kinit_path_local, params.metron_keytab_path, params.metron_principal_name, execute_user=params.metron_user) template = """{0}/kafka-topics.sh \ --zookeeper {1} \ --list | \ awk 'BEGIN {{cnt=0;}} /{2}/ {{cnt++}} END {{if (cnt > 0) {{exit 0}} else {{exit 1}}}}'""" for topic in topics: Logger.info("Checking existence of Kafka topic '{0}'".format(topic)) cmd = template.format(params.kafka_bin_dir, params.zookeeper_quorum, topic) err_msg = "Missing Kafka topic; topic={0}".format(topic) execute(cmd, user=params.kafka_user, err_msg=err_msg)
def check_hbase_acls(params, table, user=None, permissions="READ,WRITE"): """ Validates that HBase table permissions exist for a user. An exception is raised if the permissions do not exist. :param params: :param table: The name of the HBase table. :param user: The name of the user. :param permissions: The permissions that should exist. """ if user is None: user = params.metron_user Logger.info("Checking HBase ACLs; table={0}, user={1}, permissions={2}".format(table, user, permissions)) # if needed kinit as 'hbase' if params.security_enabled: kinit(params.kinit_path_local, params.hbase_keytab_path, params.hbase_principal_name, execute_user=params.hbase_user) template = """echo "user_permission '{0}'" | \ hbase shell -n | \ grep " {1} " | \ grep "actions={2}" """ cmd = template.format(table, user, permissions) err_msg = "Missing HBase access; table={0}, user={1}, permissions={2}".format(table, user, permissions) execute(cmd, user=params.hbase_user, err_msg=err_msg)
def pull_config(params): Logger.info('Pulling all Metron configs down from ZooKeeper to local file system') Logger.info('NOTE - THIS IS OVERWRITING THE LOCAL METRON CONFIG DIR WITH ZOOKEEPER CONTENTS: ' + params.metron_zookeeper_config_path) Execute(ambari_format( "{metron_home}/bin/zk_load_configs.sh --zk_quorum {zookeeper_quorum} --mode PULL --output_dir {metron_zookeeper_config_path} --force"), path=ambari_format("{java_home}/bin") )
def get_running_topologies(params): Logger.info('Getting Running Storm Topologies from Storm REST Server') Logger.info('Security enabled? ' + str(params.security_enabled)) # Want to sudo to the metron user and kinit as them so we aren't polluting root with Metron's Kerberos tickets. # This is becuase we need to run a command with a return as the metron user. Sigh negotiate = '--negotiate -u : ' if params.security_enabled else '' cmd = ambari_format( 'curl --max-time 3 ' + negotiate + '{storm_rest_addr}/api/v1/topology/summary') if params.security_enabled: kinit(params.kinit_path_local, params.metron_keytab_path, params.metron_principal_name, execute_user=params.metron_user) Logger.info('Running cmd: ' + cmd) return_code, stdout, stderr = get_user_call_output(cmd, user=params.metron_user, is_checked_call=False) if (return_code != 0): return {} try: stormjson = json.loads(stdout) except ValueError, e: Logger.info('Stdout: ' + str(stdout)) Logger.info('Stderr: ' + str(stderr)) Logger.exception(str(e)) return {}
def solr_schema_install(self, env): from params import params env.set_params(params) Logger.info("Installing Solr schemas") if self.__params.security_enabled: metron_security.kinit(self.__params.kinit_path_local, self.__params.solr_keytab_path, self.__params.solr_principal_name, self.__params.solr_user) try: commands = IndexingCommands(params) for collection_name in commands.get_solr_schemas(): # install the schema cmd = format(( "export ZOOKEEPER={solr_zookeeper_url};" "export SECURITY_ENABLED={security_enabled};" )) cmd += "{0}/bin/create_collection.sh {1};" Execute( cmd.format(params.metron_home, collection_name), user=self.__params.solr_user) return True except Exception as e: msg = "WARNING: Solr schemas could not be installed. " \ "Is Solr running? Will reattempt install on next start. error={0}" Logger.warning(msg.format(e)) return False
def get_mount_point_for_dir(dir, mount_points = None): """ :param dir: Directory to check, even if it doesn't exist. :return: Returns the closest mount point as a string for the directory. if the "dir" variable is None, will return None. If the directory does not exist, will return "/". """ best_mount_found = None if dir: dir = dir.strip() cached_mounts = [m['mount_point'] for m in get_and_cache_mount_points()] if mount_points is None else mount_points # If the path is "/hadoop/hdfs/data", then possible matches for mounts could be # "/", "/hadoop/hdfs", and "/hadoop/hdfs/data". # So take the one with the greatest number of segments. for m in cached_mounts: # Ensure that the mount path and the dir path ends with "/" # The mount point "/hadoop" should not match the path "/hadoop1" if os.path.join(dir, "").startswith(os.path.join(m, "")): if best_mount_found is None: best_mount_found = m elif os.path.join(best_mount_found, "").count(os.path.sep) < os.path.join(m, "").count(os.path.sep): best_mount_found = m Logger.info("Mount point for directory %s is %s" % (str(dir), str(best_mount_found))) return best_mount_found
def status(self, env): import params env.set_params(params) Logger.info("Status of the Master") Execute("service kibana status")
def compute_actual_version(self): """ After packages are installed, determine what the new actual version is. """ # If the repo contains a build number, optimistically assume it to be the actual_version. It will get changed # to correct value if it is not self.actual_version = None self.repo_version_with_build_number = None if self.repository_version: m = re.search("[\d\.]+-\d+", self.repository_version) if m: # Contains a build number self.repo_version_with_build_number = self.repository_version self.structured_output[ 'actual_version'] = self.repo_version_with_build_number # This is the best value known so far. self.put_structured_out(self.structured_output) Logger.info( "Attempting to determine actual version with build number.") Logger.info("Old versions: {0}".format(self.old_versions)) new_versions = get_stack_versions(self.stack_root_folder) Logger.info("New versions: {0}".format(new_versions)) deltas = set(new_versions) - set(self.old_versions) Logger.info("Deltas: {0}".format(deltas)) # Get version without build number normalized_repo_version = self.repository_version.split('-')[0] if 1 == len(deltas): self.actual_version = next(iter(deltas)).strip() self.structured_output['actual_version'] = self.actual_version self.put_structured_out(self.structured_output) write_actual_version_to_history_file(normalized_repo_version, self.actual_version) Logger.info( "Found actual version {0} by checking the delta between versions before and after installing packages" .format(self.actual_version)) else: # If the first install attempt does a partial install and is unable to report this to the server, # then a subsequent attempt will report an empty delta. For this reason, we search for a best fit version for the repo version Logger.info( "Cannot determine actual version installed by checking the delta between versions " "before and after installing package") Logger.info( "Will try to find for the actual version by searching for best possible match in the list of versions installed" ) self.actual_version = self.find_best_fit_version( new_versions, self.repository_version) if self.actual_version is not None: self.actual_version = self.actual_version.strip() self.structured_output['actual_version'] = self.actual_version self.put_structured_out(self.structured_output) Logger.info( "Found actual version {0} by searching for best possible match" .format(self.actual_version)) else: msg = "Could not determine actual version installed. Try reinstalling packages again." raise Fail(msg)
def status(self, env): Logger.info('Checking Eagle UserProfile Scheduler') import params env.set_params(params) self.configure(env) eagle_userprofile_scheduler_exec(action='status')
def pre_rolling_restart(self, env): Logger.info( "Executing rolling pre-restart Eagle UserProfile Scheduler") import params env.set_params(params)
def configure(self, env): Logger.info("Configure Eagle UserProfile Scheduler") import params env.set_params(params)
def install(self, env): Logger.info('Install Eagle UserProfile Scheduler') # self.install_packages(env) import params env.set_params(params) self.configure(env)
# Oozie tmp dir should be /var/tmp/oozie and is already created by a function above. command = format( "cd {oozie_tmp_dir} && {oozie_setup_sh} prepare-war {oozie_secure} ") command_to_file = format( "cd {oozie_tmp_dir} && {oozie_setup_sh_current} prepare-war {oozie_secure} " ).strip() run_prepare_war = False if os.path.exists(prepare_war_cmd_file): cmd = "" with open(prepare_war_cmd_file, "r") as f: cmd = f.readline().strip() if command_to_file != cmd: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {prepare_war_cmd_file} has contents which differ.\n" \ "Expected: {command_to_file}.\nActual: {cmd}.")) else: run_prepare_war = True Logger.info( format( "Will run prepare war cmd since marker file {prepare_war_cmd_file} is missing." )) return_code, libext_content, error_output = get_user_call_output( list_libext_command, user=params.oozie_user) libext_content = libext_content.strip() if run_prepare_war == False: if os.path.exists(libext_content_file): old_content = "" with open(libext_content_file, "r") as f:
def actionexecute(self, env): num_errors = 0 # Parse parameters config = Script.get_config() try: command_repository = CommandRepository(config['repositoryFile']) except KeyError: raise Fail( "The command repository indicated by 'repositoryFile' was not found" ) repo_rhel_suse = config['configurations']['cluster-env'][ 'repo_suse_rhel_template'] repo_ubuntu = config['configurations']['cluster-env'][ 'repo_ubuntu_template'] template = repo_rhel_suse if OSCheck.is_redhat_family( ) or OSCheck.is_suse_family() else repo_ubuntu # Handle a SIGTERM and SIGINT gracefully signal.signal(signal.SIGTERM, self.abort_handler) signal.signal(signal.SIGINT, self.abort_handler) self.repository_version = command_repository.version_string # Select dict that contains parameters try: package_list = json.loads(config['roleParams']['package_list']) stack_id = config['roleParams']['stack_id'] except KeyError: pass self.stack_name = Script.get_stack_name() if self.stack_name is None: raise Fail("Cannot determine the stack name") self.stack_root_folder = Script.get_stack_root() if self.stack_root_folder is None: raise Fail("Cannot determine the stack's root directory") if self.repository_version is None: raise Fail("Cannot determine the repository version to install") self.repository_version = self.repository_version.strip() try: if not command_repository.items: Logger.warning( "Repository list is empty. Ambari may not be managing the repositories for {0}." .format(self.repository_version)) else: Logger.info( "Will install packages for repository version {0}".format( self.repository_version)) new_repo_files = create_repo_files(template, command_repository) self.repo_files.update(new_repo_files) except Exception, err: Logger.logger.exception( "Cannot install repository files. Error: {0}".format(str(err))) num_errors += 1
def service_check(self, env): import params env.set_params(params) if params.streamline_ssl_enabled: streamline_api = format( "https://{params.hostname}:{params.streamline_ssl_port}/api/v1/catalog/streams/componentbundles" ) else: streamline_api = format( "http://{params.hostname}:{params.streamline_port}/api/v1/catalog/streams/componentbundles" ) Logger.info(streamline_api) max_retries = 3 success = False if (params.security_enabled) and (not params.streamline_sso_enabled): kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) return_code, out = shell.checked_call( kinit_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) for num in range(0, max_retries): try: Logger.info(format("Making http requests to {streamline_api}")) if params.security_enabled: get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + streamline_api return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) success = True Logger.info( format( "Successfully made a API request to SAM. {stdout}" )) break except Exception as e: Logger.error( format( "Response from SAM API was not a valid JSON. Response: {stdout}" )) else: response = urllib2.urlopen(streamline_api) api_response = response.read() response_code = response.getcode() Logger.info(format("SAM response http status {response}")) if response.getcode() != 200: Logger.error( format( "Failed to fetch response for {streamline_api}" )) show_logs(params.streamline_log_dir, params.streamline_user) else: success = True Logger.info( format( "Successfully made a API request to SAM. {api_response}" )) break except urllib2.URLError as e: Logger.error( format( "Failed to make API request to SAM server at {streamline_api},retrying.. {num} out of {max_retries}" )) time.sleep(num * 10) # exponential back-off continue if success != True: Logger.error( format( "Failed to make API request to SAM server at {streamline_api} after {max_retries}" ))
def clickhouse(upgrade_type=None): import params ensure_base_directories() # clickhouse server all configuration, return result type dict clickhouse_config_template = mutable_config_dict(params.clickhouse_config_json_template) clickhouse_metrika_template = mutable_config_dict(params.clickhouse_metrika_json_template) effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version) Logger.info(format("Effective stack version: {effective_version}")) # listeners and advertised.listeners are only added in 2.3.0.0 onwards. if effective_version is not None and effective_version != "": clickhouse_server_host = clickhouse_config_template['interserver_http_host'] = params.hostname Logger.info(format("clickhouse interserver_http_host: {clickhouse_server_host}")) else: listeners = clickhouse_config_template['interserver_http_host'].replace("localhost", params.hostname) Logger.info(format("clickhouse interserver_http_host: {listeners}")) #format convert import clickhouse_utils clickhouse_config = clickhouse_utils.clickhouseConfigToXML(clickhouse_config_template) clickhouse_metrika = clickhouse_utils.clickhouseMetrikaToXML(params.tcp_port,params.user_admin,params.user_admin_password,params.clickhouse_hosts,params.zookeeper_hosts,params.remote_servers,params.hostname,params.zookeeper_server,clickhouse_metrika_template) Directory(params.clickhouse_log_dir, mode=0755, cd_access='a', owner=params.clickhouse_user, group=params.clickhouse_group, create_parents = True, recursive_ownership = True, ) Directory(params.conf_dir, mode=0755, cd_access='a', owner=params.clickhouse_user, group=params.clickhouse_group, create_parents = True, recursive_ownership = True, ) File(format("{conf_dir}/config.xml"), owner=params.clickhouse_user, group=params.clickhouse_group, content=InlineTemplate(clickhouse_config) ) File(format("{conf_dir}/metrika.xml"), owner=params.clickhouse_user, group=params.clickhouse_group, content=InlineTemplate(clickhouse_metrika) ) File(format("{conf_dir}/users.xml"), owner=params.clickhouse_user, group=params.clickhouse_group, content=Template("clickhouse-users.xml.j2") ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents = True, owner='root', group='root' ) File(os.path.join(params.limits_conf_dir, 'clickhouse.conf'), owner='root', group='root', mode=0644, content=Template("clickhouse.conf.j2") ) File(os.path.join(params.bindir, 'clickhouse-manager.sh'), owner='root', group='root', mode=0755, content=Template("clickhouse-manager.sh.j2") ) File(os.path.join(params.crondir, 'clickhouse-server'), owner='root', group='root', mode=0755, content=Template("clickhouse-server-cron.j2") )
def upgrade_schema(self, env): """ Executes the schema upgrade binary. This is its own function because it could be called as a standalone task from the upgrade pack, but is safe to run it for each metastore instance. The schema upgrade on an already upgraded metastore is a NOOP. The metastore schema upgrade requires a database driver library for most databases. During an upgrade, it's possible that the library is not present, so this will also attempt to copy/download the appropriate driver. This function will also ensure that configurations are written out to disk before running since the new configs will most likely not yet exist on an upgrade. Should not be invoked for a DOWNGRADE; Metastore only supports schema upgrades. """ Logger.info("Upgrading Hive Metastore Schema") import status_params import params env.set_params(params) # ensure that configurations are written out before trying to upgrade the schema # since the schematool needs configs and doesn't know how to use the hive conf override self.configure(env) if params.security_enabled: cached_kinit_executor(status_params.kinit_path_local, status_params.hive_user, params.hive_metastore_keytab_path, params.hive_metastore_principal, status_params.hostname, status_params.tmp_dir) # ensure that the JDBC drive is present for the schema tool; if it's not # present, then download it first if params.hive_jdbc_driver in params.hive_jdbc_drivers_list: target_directory = format("{stack_root}/{version}/hive/lib") # download it if it does not exist if not os.path.exists(params.source_jdbc_file): jdbc_connector(params.hive_jdbc_target, params.hive_previous_jdbc_jar) target_directory_and_filename = os.path.join( target_directory, os.path.basename(params.source_jdbc_file)) if params.sqla_db_used: target_native_libs_directory = format( "{target_directory}/native/lib64") Execute( format( "yes | {sudo} cp {jars_in_hive_lib} {target_directory}" )) Directory(target_native_libs_directory, create_parents=True) Execute( format( "yes | {sudo} cp {libs_in_hive_lib} {target_native_libs_directory}" )) Execute( format( "{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*" )) else: # copy the JDBC driver from the older metastore location to the new location only # if it does not already exist if not os.path.exists(target_directory_and_filename): Execute(('cp', params.source_jdbc_file, target_directory), path=["/bin", "/usr/bin/"], sudo=True) File(target_directory_and_filename, mode=0644) # build the schema tool command binary = format("{hive_schematool_ver_bin}/schematool") # the conf.server directory changed locations between stack versions # since the configurations have not been written out yet during an upgrade # we need to choose the original legacy location schematool_hive_server_conf_dir = params.hive_server_conf_dir upgrade_from_version = upgrade_summary.get_source_version( "HIVE", default_version=params.version_for_stack_feature_checks) if not (check_stack_feature(StackFeature.CONFIG_VERSIONING, upgrade_from_version)): schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF env_dict = {'HIVE_CONF_DIR': schematool_hive_server_conf_dir} command = format( "{binary} -dbType {hive_metastore_db_type} -upgradeSchema") Execute(command, user=params.hive_user, tries=1, environment=env_dict, logoutput=True)
def service_check(self, env): import params env.set_params(params) Logger.info("Registry check passed") registry_api = format( "http://{params.hostname}:{params.registry_port}/api/v1/schemaregistry/schemaproviders" ) Logger.info(registry_api) max_retries = 3 success = False if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {params.smoke_user_keytab} {params.smokeuser_principal};" ) return_code, out = Execute(kinit_cmd, user=params.smokeuser) for num in range(0, max_retries): try: Logger.info(format("Making http requests to {registry_api}")) if params.security_enabled: get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + registry_api return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) success = True Logger.info( format( "Successfully made a API request to registry. {stdout}" )) break except Exception as e: Logger.error( format( "Response from REGISTRY API was not a valid JSON. Response: {stdout}" )) else: response = urllib2.urlopen(registry_api) api_response = response.read() response_code = response.getcode() Logger.info( format( "registry response http status {response_code}")) if response.getcode() != 200: Logger.error( format( "Failed to fetch response for {registry_api}")) show_logs(params.registry_log_dir, params.registry_user) raise else: success = True Logger.info( format( "Successfully made a API request to registry. {api_response}" )) break except urllib2.URLError as e: Logger.error( format( "Failed to make API request to Registry server at {registry_api},retrying.. {num} out {max_retries}" )) time.sleep(num * 10) # exponential back off continue if success != True: raise Fail( format( "Failed to make API request to Registry server at {registry_api} after {max_retries}" ))
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, upgrade_suspended=False, env=None): if action is None: raise Fail('"action" parameter is required for function namenode().') if action in ["start", "stop"] and hdfs_binary is None: raise Fail( '"hdfs_binary" parameter is required for function namenode().') if action == "configure": import params #we need this directory to be present before any action(HA manual steps for #additional namenode) create_name_dirs(params.dfs_name_dir) # set up failover / secure zookeper ACLs, this feature is supported from HDP 2.6 ownwards set_up_zkfc_security(params) elif action == "start": Logger.info("Called service {0} with upgrade_type: {1}".format( action, str(upgrade_type))) setup_ranger_hdfs(upgrade_type=upgrade_type) import params File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group) if do_format and not params.hdfs_namenode_format_disabled: format_namenode() pass if params.dfs_ha_enabled and \ params.dfs_ha_namenode_standby is not None and \ params.hostname == params.dfs_ha_namenode_standby: # if the current host is the standby NameNode in an HA deployment # run the bootstrap command, to start the NameNode in standby mode # this requires that the active NameNode is already up and running, # so this execute should be re-tried upon failure, up to a timeout success = bootstrap_standby_namenode(params) if not success: raise Fail("Could not bootstrap standby namenode") if upgrade_type == constants.UPGRADE_TYPE_ROLLING and params.dfs_ha_enabled: # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried # to kill ZKFC manually, so we need to start it if not already running. safe_zkfc_op(action, env) options = "" if upgrade_type == constants.UPGRADE_TYPE_ROLLING: if params.upgrade_direction == Direction.UPGRADE: options = "-rollingUpgrade started" elif params.upgrade_direction == Direction.DOWNGRADE: options = "-rollingUpgrade downgrade" elif upgrade_type == constants.UPGRADE_TYPE_NON_ROLLING: is_previous_image_dir = is_previous_fs_image() Logger.info("Previous file system image dir present is {0}".format( str(is_previous_image_dir))) if params.upgrade_direction == Direction.UPGRADE: options = "-rollingUpgrade started" elif params.upgrade_direction == Direction.DOWNGRADE: options = "-rollingUpgrade downgrade" elif upgrade_type == constants.UPGRADE_TYPE_HOST_ORDERED: # nothing special to do for HOU - should be very close to a normal restart pass elif upgrade_type is None and upgrade_suspended is True: # the rollingUpgrade flag must be passed in during a suspended upgrade when starting NN if os.path.exists( namenode_upgrade.get_upgrade_in_progress_marker()): options = "-rollingUpgrade started" else: Logger.info( "The NameNode upgrade marker file {0} does not exist, yet an upgrade is currently suspended. " "Assuming that the upgrade of NameNode has not occurred yet." .format(namenode_upgrade.get_upgrade_in_progress_marker())) Logger.info("Options for start command are: {0}".format(options)) service(action="start", name="namenode", user=params.hdfs_user, options=options, create_pid_dir=True, create_log_dir=True) if params.security_enabled: Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}" ), user=params.hdfs_user) # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____| # no-HA | ON -> OFF | Yes | # HA and active | ON -> OFF | Yes | # HA and standby | no change | No | # RU with HA on active | ON -> OFF | Yes | # RU with HA on standby | ON -> OFF | Yes | # EU with HA on active | ON -> OFF | No | # EU with HA on standby | ON -> OFF | No | # EU non-HA | ON -> OFF | No | # because we do things like create directories after starting NN, # the vast majority of the time this should be True - it should only # be False if this is HA and we are the Standby NN ensure_safemode_off = True # True if this is the only NameNode (non-HA) or if its the Active one in HA is_active_namenode = True if params.dfs_ha_enabled: Logger.info( "Waiting for the NameNode to broadcast whether it is Active or Standby..." ) if is_this_namenode_active() is False: # we are the STANDBY NN is_active_namenode = False # we are the STANDBY NN and this restart is not part of an upgrade if upgrade_type is None: ensure_safemode_off = False # During an Express Upgrade, NameNode will not leave SafeMode until the DataNodes are started, # so always disable the Safemode check if upgrade_type == constants.UPGRADE_TYPE_NON_ROLLING: ensure_safemode_off = False # some informative logging separate from the above logic to keep things a little cleaner if ensure_safemode_off: Logger.info( "Waiting for this NameNode to leave Safemode due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}" .format(params.dfs_ha_enabled, is_active_namenode, upgrade_type)) else: Logger.info( "Skipping Safemode check due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}" .format(params.dfs_ha_enabled, is_active_namenode, upgrade_type)) # wait for Safemode to end if ensure_safemode_off: if params.rolling_restart and params.rolling_restart_safemode_exit_timeout: calculated_retries = int( params.rolling_restart_safemode_exit_timeout) / 30 wait_for_safemode_off(hdfs_binary, afterwait_sleep=30, retries=calculated_retries, sleep_seconds=30) else: wait_for_safemode_off(hdfs_binary) # Always run this on the "Active" NN unless Safemode has been ignored # in the case where safemode was ignored (like during an express upgrade), then # NN will be in SafeMode and cannot have directories created if is_active_namenode and ensure_safemode_off: create_hdfs_directories() create_ranger_audit_hdfs_directories() else: Logger.info( "Skipping creation of HDFS directories since this is either not the Active NameNode or we did not wait for Safemode to finish." ) elif action == "stop": import params service(action="stop", name="namenode", user=params.hdfs_user) elif action == "status": import status_params check_process_status(status_params.namenode_pid_file) elif action == "decommission": decommission()
def create_repository_urllib2(self, data, usernamepassword, policy_user): """ :param data: repository dict :param usernamepassword: user credentials using which repository needs to be created :param policy_user: use this policy user for policies that will be used during repository creation :return Returns created repository response else None """ try: searchRepoURL = self.urlReposPub base64string = base64.encodestring( '{0}'.format(usernamepassword)).replace('\n', '') headers = { 'Accept': 'application/json', "Content-Type": "application/json" } request = urllib2.Request(searchRepoURL, data, headers) request.add_header("Authorization", "Basic {0}".format(base64string)) result = openurl(request, timeout=20) response_code = result.getcode() response = json.loads(json.JSONEncoder().encode(result.read())) if response_code == 200: Logger.info('Repository created Successfully') # Get Policies repoData = json.loads(data) repoName = repoData['name'] typeOfPolicy = repoData['repositoryType'] # Get Policies by repo name policyList = self.get_policy_by_repo_name( name=repoName, component=typeOfPolicy, status="true", usernamepassword=usernamepassword) if policyList is not None and (len(policyList)) > 0: policiesUpdateCount = 0 for policy in policyList: updatedPolicyObj = self.get_policy_params( typeOfPolicy, policy, policy_user) policyResCode = self.update_ranger_policy( updatedPolicyObj['id'], json.dumps(updatedPolicyObj), usernamepassword) if policyResCode == 200: policiesUpdateCount = policiesUpdateCount + 1 else: Logger.info('Policy Update failed') # Check for count of updated policies if len(policyList) == policiesUpdateCount: Logger.info( "Ranger Repository created successfully and policies updated successfully providing ambari-qa user all permissions" ) return response else: return None else: Logger.info( "Policies not found for the newly created Repository") return None else: Logger.info('Repository creation failed') return None except urllib2.URLError, e: if isinstance(e, urllib2.HTTPError): raise Fail( "Error creating repository. Http status code - {0}. \n {1}" .format(e.code, e.read())) else: raise Fail("Error creating repository. Reason - {0}.".format( e.reason))
def service_check(self, env): import params Logger.info("Ambari Metrics service check was started.") env.set_params(params) random_value1 = random.random() current_time = int(time.time()) * 1000 metric_json = Template('smoketest_metrics.json.j2', hostname=params.hostname, random1=random_value1, current_time=current_time).get_content() Logger.info("Generated metrics:\n%s" % metric_json) headers = {"Content-type": "application/json"} for i in xrange(0, self.AMS_CONNECT_TRIES): try: Logger.info("Connecting (POST) to %s:%s%s" % (params.metric_collector_host, params.metric_collector_port, self.AMS_METRICS_POST_URL)) conn = httplib.HTTPConnection(params.metric_collector_host, int(params.metric_collector_port)) conn.request("POST", self.AMS_METRICS_POST_URL, metric_json, headers) except (httplib.HTTPException, socket.error) as ex: if i < self.AMS_CONNECT_TRIES - 1: #range/xrange returns items from start to end-1 time.sleep(self.AMS_CONNECT_TIMEOUT) Logger.info("Connection failed. Next retry in %s seconds." % (self.AMS_CONNECT_TIMEOUT)) continue else: raise Fail("Metrics were not saved. Service check has failed. " "\nConnection failed.") response = conn.getresponse() Logger.info("Http response: %s %s" % (response.status, response.reason)) data = response.read() Logger.info("Http data: %s" % data) conn.close() if response.status == 200: Logger.info("Metrics were saved.") break else: Logger.info("Metrics were not saved. Service check has failed.") if i < self.AMS_CONNECT_TRIES - 1: #range/xrange returns items from start to end-1 time.sleep(self.AMS_CONNECT_TIMEOUT) Logger.info("Next retry in %s seconds." % (self.AMS_CONNECT_TIMEOUT)) else: raise Fail("Metrics were not saved. Service check has failed. POST request status: %s %s \n%s" % (response.status, response.reason, data)) get_metrics_parameters = { "metricNames": "AMBARI_METRICS.SmokeTest.FakeMetric", "appId": "amssmoketestfake", "hostname": params.hostname, "startTime": current_time - 60000, "endTime": current_time + 61000, "precision": "seconds", "grouped": "false", } encoded_get_metrics_parameters = urllib.urlencode(get_metrics_parameters) Logger.info("Connecting (GET) to %s:%s%s" % (params.metric_collector_host, params.metric_collector_port, self.AMS_METRICS_GET_URL % encoded_get_metrics_parameters)) conn = httplib.HTTPConnection(params.metric_collector_host, int(params.metric_collector_port)) conn.request("GET", self.AMS_METRICS_GET_URL % encoded_get_metrics_parameters) response = conn.getresponse() Logger.info("Http response: %s %s" % (response.status, response.reason)) data = response.read() Logger.info("Http data: %s" % data) conn.close() if response.status == 200: Logger.info("Metrics were retrieved.") else: Logger.info("Metrics were not retrieved. Service check has failed.") raise Fail("Metrics were not retrieved. Service check has failed. GET request status: %s %s \n%s" % (response.status, response.reason, data)) data_json = json.loads(data) def floats_eq(f1, f2, delta): return abs(f1-f2) < delta for metrics_data in data_json["metrics"]: if (str(current_time) in metrics_data["metrics"] and str(current_time + 1000) in metrics_data["metrics"] and floats_eq(metrics_data["metrics"][str(current_time)], random_value1, 0.0000001) and floats_eq(metrics_data["metrics"][str(current_time + 1000)], current_time, 1)): Logger.info("Values %s and %s were found in the response." % (random_value1, current_time)) break pass else: Logger.info("Values %s and %s were not found in the response." % (random_value1, current_time)) raise Fail("Values %s and %s were not found in the response." % (random_value1, current_time)) Logger.info("Ambari Metrics service check is finished.")
:param name: Tarball name, e.g., tez, hive, pig, sqoop. :param user_group: Group to own the directory. :param owner: File owner :param file_mode: File permission :param custom_source_file: Override the source file path :param custom_dest_file: Override the destination file path :param force_execute: If true, will execute the HDFS commands immediately, otherwise, will defer to the calling function. :param use_upgrading_version_during_upgrade: If true, will use the version going to during upgrade. Otherwise, use the CURRENT (source) version. :param skip: If true, tarballs will not be copied as the cluster deployment uses prepped VMs. :param skip_component_check: If true, will skip checking if a given component is installed on the node for a file under its dir to be copied. This is in case the file is not mapped to a component but rather to a specific location (JDK jar, Ambari jar, etc). :return: Will return True if successful, otherwise, False. """ import params Logger.info("Called copy_to_hdfs tarball: {0}".format(name)) (success, source_file, dest_file) = get_tarball_paths(name, use_upgrading_version_during_upgrade, custom_source_file, custom_dest_file) if not success: Logger.error( "Could not copy tarball {0} due to a missing or incorrect parameter." .format(str(name))) return False if skip: Logger.warning( "Skipping copying {0} to {1} for {2} as it is a sys prepped host.". format(str(source_file), str(dest_file), str(name))) return True
def create_ambari_admin_user(self, ambari_admin_username, ambari_admin_password, usernamepassword): """ :param ambari_admin_username: username of user to be created :param ambari_admin_username: user password of user to be created :return Returns response code for successful user creation else None """ flag_ambari_admin_present = False match = re.match('[a-zA-Z0-9_\S]+$', ambari_admin_password) if match is None: raise Fail( 'Invalid password given for Ranger Admin user for Ambari') try: url = self.urlUsers + '?name=' + str(ambari_admin_username) request = urllib2.Request(url) base64string = base64.encodestring(usernamepassword).replace( '\n', '') request.add_header("Content-Type", "application/json") request.add_header("Accept", "application/json") request.add_header("Authorization", "Basic {0}".format(base64string)) result = openurl(request, timeout=20) response_code = result.getcode() response = json.loads(result.read()) if response_code == 200 and len(response['vXUsers']) >= 0: for vxuser in response['vXUsers']: if vxuser['name'] == ambari_admin_username: flag_ambari_admin_present = True break else: flag_ambari_admin_present = False if flag_ambari_admin_present: Logger.info(ambari_admin_username + ' user already exists.') return response_code else: Logger.info( ambari_admin_username + ' user is not present, creating user using given configurations' ) url = self.urlSecUsers admin_user = dict() admin_user['status'] = 1 admin_user['userRoleList'] = ['ROLE_SYS_ADMIN'] admin_user['name'] = ambari_admin_username admin_user['password'] = ambari_admin_password admin_user['description'] = ambari_admin_username admin_user['firstName'] = ambari_admin_username data = json.dumps(admin_user) base64string = base64.encodestring( '{0}'.format(usernamepassword)).replace('\n', '') headers = { 'Accept': 'application/json', "Content-Type": "application/json" } request = urllib2.Request(url, data, headers) request.add_header("Authorization", "Basic {0}".format(base64string)) result = openurl(request, timeout=20) response_code = result.getcode() response = json.loads(json.JSONEncoder().encode( result.read())) if response_code == 200 and response is not None: Logger.info('Ambari admin user creation successful.') return response_code else: Logger.info('Ambari admin user creation failed.') return None else: return None except urllib2.URLError, e: if isinstance(e, urllib2.HTTPError): raise Fail( "Error creating ambari admin user. Http status code - {0}. \n {1}" .format(e.code, e.read())) else: raise Fail( "Error creating ambari admin user. Reason - {0}.".format( e.reason))
def init_pcap(self): self.init_kafka_topics() self.init_hdfs_dir() Logger.info("Done initializing PCAP configuration")
def stop_indexing_topology(self): Logger.info('Stopping ' + self.__indexing) stop_cmd = 'storm kill ' + self.__indexing Execute(stop_cmd) Logger.info('Done stopping indexing topologies')
def init_kafka_topics(self): Logger.info('Creating Kafka topic for PCAP') metron_service.init_kafka_topics(self.__params, self.__get_topics())
def initiate_safe_zkfc_failover(): """ If this is the active namenode, initiate a safe failover and wait for it to become the standby. If an error occurs, force a failover to happen by killing zkfc on this host. In this case, during the Restart, will also have to start ZKFC manually. """ import params # Must kinit before running the HDFS command if params.security_enabled: Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user = params.hdfs_user) active_namenode_id = None standby_namenode_id = None active_namenodes, standby_namenodes, unknown_namenodes = get_namenode_states(params.hdfs_site, params.security_enabled, params.hdfs_user) if active_namenodes: active_namenode_id = active_namenodes[0][0] if standby_namenodes: standby_namenode_id = standby_namenodes[0][0] if active_namenode_id: Logger.info(format("Active NameNode id: {active_namenode_id}")) if standby_namenode_id: Logger.info(format("Standby NameNode id: {standby_namenode_id}")) if unknown_namenodes: for unknown_namenode in unknown_namenodes: Logger.info("NameNode HA state for {0} is unknown".format(unknown_namenode[0])) if params.namenode_id == active_namenode_id and params.other_namenode_id == standby_namenode_id: # Failover if this NameNode is active and other NameNode is up and in standby (i.e. ready to become active on failover) Logger.info(format("NameNode {namenode_id} is active and NameNode {other_namenode_id} is in standby")) failover_command = format("hdfs haadmin -ns {dfs_ha_nameservices} -failover {namenode_id} {other_namenode_id}") check_standby_cmd = format("hdfs haadmin -ns {dfs_ha_nameservices} -getServiceState {namenode_id} | grep standby") msg = "Rolling Upgrade - Initiating a ZKFC failover on active NameNode host {0}.".format(params.hostname) Logger.info(msg) code, out = shell.call(failover_command, user=params.hdfs_user, logoutput=True) Logger.info(format("Rolling Upgrade - failover command returned {code}")) wait_for_standby = False if code == 0: wait_for_standby = True else: # Try to kill ZKFC manually was_zkfc_killed = kill_zkfc(params.hdfs_user) code, out = shell.call(check_standby_cmd, user=params.hdfs_user, logoutput=True) Logger.info(format("Rolling Upgrade - check for standby returned {code}")) if code == 255 and out: Logger.info("Rolling Upgrade - NameNode is already down.") else: if was_zkfc_killed: # Only mandate that this be the standby namenode if ZKFC was indeed killed to initiate a failover. wait_for_standby = True if wait_for_standby: Logger.info("Waiting for this NameNode to become the standby one.") Execute(check_standby_cmd, user=params.hdfs_user, tries=50, try_sleep=6, logoutput=True) else: msg = "Rolling Upgrade - Skipping ZKFC failover on NameNode host {0}.".format(params.hostname) Logger.info(msg)
def setup_ranger_storm(upgrade_type=None): """ :param upgrade_type: Upgrade Type such as "rolling" or "nonrolling" """ import params if params.enable_ranger_storm and params.security_enabled: stack_version = None if upgrade_type is not None: stack_version = params.version if params.retryAble: Logger.info("Storm: Setup ranger: command retry enables thus retrying if ranger admin is down !") else: Logger.info("Storm: Setup ranger: command retry not enabled thus skipping if ranger admin is down !") if params.xml_configurations_supported and params.enable_ranger_storm and params.xa_audit_hdfs_is_enabled: if params.has_namenode: params.HdfsResource("/ranger/audit", type="directory", action="create_on_execute", owner=params.hdfs_user, group=params.hdfs_user, mode=0755, recursive_chmod=True ) params.HdfsResource("/ranger/audit/storm", type="directory", action="create_on_execute", owner=params.storm_user, group=params.storm_user, mode=0700, recursive_chmod=True ) params.HdfsResource(None, action="execute") if params.xml_configurations_supported: api_version=None if params.stack_supports_ranger_kerberos: api_version='v2' from resource_management.libraries.functions.setup_ranger_plugin_xml import setup_ranger_plugin setup_ranger_plugin('storm-nimbus', 'storm', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java64_home, params.repo_name, params.storm_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_storm, conf_dict=params.conf_dir, component_user=params.storm_user, component_group=params.user_group, cache_service_list=['storm'], plugin_audit_properties=params.config['configurations']['ranger-storm-audit'], plugin_audit_attributes=params.config['configuration_attributes']['ranger-storm-audit'], plugin_security_properties=params.config['configurations']['ranger-storm-security'], plugin_security_attributes=params.config['configuration_attributes']['ranger-storm-security'], plugin_policymgr_ssl_properties=params.config['configurations']['ranger-storm-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config['configuration_attributes']['ranger-storm-policymgr-ssl'], component_list=['storm-client', 'storm-nimbus'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override = stack_version, skip_if_rangeradmin_down= not params.retryAble,api_version=api_version, is_security_enabled = params.security_enabled, is_stack_supports_ranger_kerberos = params.stack_supports_ranger_kerberos, component_user_principal=params.ranger_storm_principal if params.security_enabled else None, component_user_keytab=params.ranger_storm_keytab if params.security_enabled else None) else: from resource_management.libraries.functions.setup_ranger_plugin import setup_ranger_plugin setup_ranger_plugin('storm-nimbus', 'storm', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java64_home, params.repo_name, params.storm_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_storm, conf_dict=params.conf_dir, component_user=params.storm_user, component_group=params.user_group, cache_service_list=['storm'], plugin_audit_properties=params.config['configurations']['ranger-storm-audit'], plugin_audit_attributes=params.config['configuration_attributes']['ranger-storm-audit'], plugin_security_properties=params.config['configurations']['ranger-storm-security'], plugin_security_attributes=params.config['configuration_attributes']['ranger-storm-security'], plugin_policymgr_ssl_properties=params.config['configurations']['ranger-storm-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config['configuration_attributes']['ranger-storm-policymgr-ssl'], component_list=['storm-client', 'storm-nimbus'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override = stack_version, skip_if_rangeradmin_down= not params.retryAble) site_files_create_path = format('{storm_component_home_dir}/extlib-daemon/ranger-storm-plugin-impl/conf') Directory(site_files_create_path, owner = params.storm_user, group = params.user_group, mode=0775, create_parents = True, cd_access = 'a' ) if params.stack_supports_core_site_for_ranger_plugin and params.enable_ranger_storm and params.security_enabled: if params.has_namenode: Logger.info("Stack supports core-site.xml creation for Ranger plugin and Namenode is installed, creating create core-site.xml from namenode configurations") setup_core_site_for_required_plugins(component_user = params.storm_user, component_group = params.user_group, create_core_site_path = site_files_create_path, configurations = params.config['configurations']['core-site'], configuration_attributes = params.config['configuration_attributes']['core-site']) else: Logger.info("Stack supports core-site.xml creation for Ranger plugin and Namenode is not installed, creating create core-site.xml from default configurations") setup_core_site_for_required_plugins(component_user = params.storm_user, component_group = params.user_group, create_core_site_path = site_files_create_path, configurations = { 'hadoop.security.authentication' : 'kerberos' if params.security_enabled else 'simple' }, configuration_attributes = {}) if len(params.namenode_hosts) > 1: Logger.info('Ranger Storm plugin is enabled along with security and NameNode is HA , creating hdfs-site.xml') XmlConfig("hdfs-site.xml", conf_dir=site_files_create_path, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes']['hdfs-site'], owner=params.storm_user, group=params.user_group, mode=0644 ) else: Logger.info('Ranger Storm plugin is not enabled or security is disabled, removing hdfs-site.xml') File(format('{site_files_create_path}/hdfs-site.xml'), action="delete") else: Logger.info("Stack does not support core-site.xml creation for Ranger plugin, skipping core-site.xml configurations") else: Logger.info('Ranger Storm plugin is not enabled')
def init_kafka_acls(self): Logger.info('Creating Kafka ACLs for PCAP') metron_service.init_kafka_acls(self.__params, self.__get_topics()) metron_service.init_kafka_acl_groups(self.__params, self.__get_kafka_acl_groups())
def stop_enrichment_topology(self): Logger.info('Stopping ' + self.__enrichment_topology) stop_cmd = 'storm kill ' + self.__enrichment_topology Execute(stop_cmd, user=self.__params.metron_user) Logger.info('Done stopping enrichment topologies')
def pre_upgrade_restart(self, env): Logger.info("Executing superset Upgrade pre-restart") import params env.set_params(params)
def reloadproxyusers(self, env): import params env.set_params(params) Logger.info("RELOAD HDFS PROXY USERS") refreshProxyUsers()
def set_geo_configured(self): Logger.info("Setting GEO Configured to True") File(self.__params.enrichment_geo_configured_flag_file, content="", owner=self.__params.metron_user, mode=0755)
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing Stack Upgrade pre-restart") import params env.set_params(params) stack_select.select_packages(params.version)
def get_stack_feature_version(config): """ Uses the specified ConfigDictionary to determine which version to use for stack feature checks. Normally, the commandParams/version is the correct value to use as it represent the 4-digit exact stack version/build being upgrade to or downgraded to. However, there are cases where the commands being sent are to stop running services which are on a different stack version from the version being upgraded/downgraded to. As a result, the configurations sent for these specific stop commands do not match commandParams/version. :param config: a ConfigDictionary instance to extra the hostLevelParams and commandParams from. :return: the version to use when checking stack features. """ from resource_management.libraries.functions.default import default if "hostLevelParams" not in config or "commandParams" not in config: raise Fail("Unable to determine the correct version since hostLevelParams and commandParams were not present in the configuration dictionary") # should always be there stack_version = config['hostLevelParams']['stack_version'] # something like 2.4.0.0-1234; represents the version for the command # (or None if this is a cluster install and it hasn't been calculated yet) # this is always guaranteed to be the correct version for the command, even in # upgrade and downgrade scenarios command_version = default("/commandParams/version", None) command_stack = default("/commandParams/target_stack", None) # something like 2.4.0.0-1234 # (or None if this is a cluster install and it hasn't been calculated yet) current_cluster_version = default("/hostLevelParams/current_version", None) # UPGRADE or DOWNGRADE (or None) upgrade_direction = default("/commandParams/upgrade_direction", None) # start out with the value that's right 99% of the time version_for_stack_feature_checks = command_version if command_version is not None else stack_version # if this is not an upgrade, then we take the simple path if upgrade_direction is None: Logger.info( "Stack Feature Version Info: Cluster Stack={0}, Cluster Current Version={1}, Command Stack={2}, Command Version={3} -> {4}".format( stack_version, current_cluster_version, command_stack, command_version, version_for_stack_feature_checks)) return version_for_stack_feature_checks # STOP commands are the trouble maker as they are intended to stop a service not on the # version of the stack being upgrade/downgraded to is_stop_command = _is_stop_command(config) if not is_stop_command: Logger.info( "Stack Feature Version Info: Cluster Stack={0}, Cluster Current Version={1}, Command Stack={2}, Command Version={3}, Upgrade Direction={4} -> {5}".format( stack_version, current_cluster_version, command_stack, command_version, upgrade_direction, version_for_stack_feature_checks)) return version_for_stack_feature_checks # something like 2.5.0.0-5678 (or None) downgrade_from_version = default("/commandParams/downgrade_from_version", None) # guaranteed to have a STOP command now during an UPGRADE/DOWNGRADE, check direction if upgrade_direction.lower() == Direction.DOWNGRADE.lower(): if downgrade_from_version is None: Logger.warning( "Unable to determine the version being downgraded when stopping services, using {0}".format( version_for_stack_feature_checks)) else: version_for_stack_feature_checks = downgrade_from_version else: # UPGRADE if current_cluster_version is not None: version_for_stack_feature_checks = current_cluster_version else: version_for_stack_feature_checks = command_version if command_version is not None else stack_version Logger.info( "Stack Feature Version Info: Cluster Stack={0}, Cluster Current Version={1}, Command Stack={2}, Command Version={3}, Upgrade Direction={4}, stop_command={5} -> {6}".format( stack_version, current_cluster_version, command_stack, command_version, upgrade_direction, is_stop_command, version_for_stack_feature_checks)) return version_for_stack_feature_checks
def setup_ranger_hive(): import params if params.has_ranger_admin: File( params.ranger_downloaded_custom_connector, content=DownloadSource(params.ranger_driver_curl_source), ) if not os.path.isfile(params.ranger_driver_curl_target): Execute(('cp', '--remove-destination', params.ranger_downloaded_custom_connector, params.ranger_driver_curl_target), path=["/bin", "/usr/bin/"], sudo=True) try: command = 'hdp-select status hive-server2' return_code, hdp_output = shell.call(command, timeout=20) except Exception, e: Logger.error(str(e)) raise Fail( 'Unable to execute hdp-select command to retrieve the version.' ) if return_code != 0: raise Fail( 'Unable to determine the current version because of a non-zero return code of {0}' .format(str(return_code))) hdp_version = re.sub('hive-server2 - ', '', hdp_output).strip() match = re.match('[0-9]+.[0-9]+.[0-9]+.[0-9]+-[0-9]+', hdp_version) if match is None: raise Fail('Failed to get extracted version') file_path = '/usr/hdp/' + hdp_version + '/ranger-hive-plugin/install.properties' if not os.path.isfile(file_path): raise Fail( 'Ranger Hive plugin install.properties file does not exist at {0}' .format(file_path)) ranger_hive_dict = ranger_hive_properties() hive_repo_data = hive_repo_properties() write_properties_to_file(file_path, ranger_hive_dict) if params.enable_ranger_hive: cmd = format( 'cd /usr/hdp/{hdp_version}/ranger-hive-plugin/ && sh enable-hive-plugin.sh' ) ranger_adm_obj = Rangeradmin( url=ranger_hive_dict['POLICY_MGR_URL']) response_code, response_recieved = ranger_adm_obj.check_ranger_login_urllib2( ranger_hive_dict['POLICY_MGR_URL'] + '/login.jsp', 'test:test') if response_code is not None and response_code == 200: ambari_ranger_admin, ambari_ranger_password = ranger_adm_obj.create_ambari_admin_user( params.ambari_ranger_admin, params.ambari_ranger_password, params.admin_uname_password) ambari_username_password_for_ranger = ambari_ranger_admin + ':' + ambari_ranger_password if ambari_ranger_admin != '' and ambari_ranger_password != '': repo = ranger_adm_obj.get_repository_by_name_urllib2( ranger_hive_dict['REPOSITORY_NAME'], 'hive', 'true', ambari_username_password_for_ranger) if repo and repo['name'] == ranger_hive_dict[ 'REPOSITORY_NAME']: Logger.info('Hive Repository exist') else: response = ranger_adm_obj.create_repository_urllib2( hive_repo_data, ambari_username_password_for_ranger, params.policy_user) if response is not None: Logger.info( 'Hive Repository created in Ranger Admin') else: Logger.info( 'Hive Repository creation failed in Ranger Admin' ) else: Logger.info( 'Ambari admin username and password are blank ') else: Logger.info('Ranger service is not started on given host') else: cmd = format( 'cd /usr/hdp/{hdp_version}/ranger-hive-plugin/ && sh disable-hive-plugin.sh' ) Execute(cmd, environment={'JAVA_HOME': params.java64_home}, logoutput=True)
def reload_configs(self, env): import params env.set_params(params) Logger.info("RELOAD CONFIGS") reconfig("namenode", params.namenode_address)