def start(self, env, upgrade_type=None): import params env.set_params(params) self.configure(env) daemon_cmd = format( 'source {params.conf_dir}/atlas-env.sh ; {params.metadata_start_script}' ) no_op_test = format( 'ls {params.pid_file} >/dev/null 2>&1 && ps -p `cat {params.pid_file}` >/dev/null 2>&1' ) atlas_hbase_setup_command = format( "cat {atlas_hbase_setup} | hbase shell -n") atlas_kafka_setup_command = format("bash {atlas_kafka_setup}") secure_atlas_hbase_setup_command = format( "kinit -kt {hbase_user_keytab} {hbase_principal_name}; " ) + atlas_hbase_setup_command # in case if principal was distributed across several hosts, pattern need to be replaced to right one secure_atlas_kafka_setup_command = format( "kinit -kt {kafka_keytab} {kafka_principal_name}; ").replace( "_HOST", params.hostname) + atlas_kafka_setup_command if params.stack_supports_atlas_ranger_plugin: Logger.info('Atlas plugin is enabled, configuring Atlas plugin.') setup_ranger_atlas(upgrade_type=upgrade_type) else: Logger.info('Atlas plugin is not supported or enabled.') try: effective_version = get_stack_feature_version(params.config) if check_stack_feature(StackFeature.ATLAS_HBASE_SETUP, effective_version): if params.security_enabled and params.has_hbase_master: Execute(secure_atlas_hbase_setup_command, tries=5, try_sleep=10, user=params.hbase_user) elif params.enable_ranger_hbase and not params.security_enabled: Execute(atlas_hbase_setup_command, tries=5, try_sleep=10, user=params.hbase_user) if check_stack_feature( StackFeature.ATLAS_UPGRADE_SUPPORT, effective_version) and params.security_enabled: try: Execute(secure_atlas_kafka_setup_command, user=params.kafka_user, tries=5, try_sleep=10) except Fail: pass # do nothing and do not block Atlas start, fail logs would be available via Execute internals Execute(daemon_cmd, user=params.metadata_user, not_if=no_op_test) except: show_logs(params.log_dir, params.metadata_user) raise
def service_check(self, env): import params env.set_params(params) if params.security_enabled: Execute(format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}" ), user=params.smoke_test_user) atlas_host_call_count = 0 for atlas_host in params.atlas_hosts: if params.security_enabled: smoke_cmd = format( 'curl -k --negotiate -u : -b ~/cookiejar.txt -c ~/cookiejar.txt -s -o /dev/null -w "%{{http_code}}" {metadata_protocol}://{atlas_host}:{metadata_port}/' ) else: smoke_cmd = format( 'curl -k -s -o /dev/null -w "%{{http_code}}" {metadata_protocol}://{atlas_host}:{metadata_port}/' ) try: Execute(smoke_cmd, user=params.smoke_test_user, tries=5, try_sleep=10) except Exception, err: atlas_host_call_count = atlas_host_call_count + 1 Logger.error( "ATLAS service check failed for host {0} with error {1}". format(atlas_host, err))
def install_airflow(self, env): import params env.set_params(params) Execute('yum groupinstall -y "Development Tools"') Execute( "/opt/anaconda/bin/pip3 install 'pyqtwebengine<5.13' --force-reinstall" ) Execute( '/opt/anaconda/bin/pip3 install "pyqt5<5.13" --force-reinstall') Execute( '/opt/anaconda/bin/pip3 install apache-airflow==1.10.12 --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1.10.12/constraints-3.8.txt"' ) Directory(params.airflow_home_directory, create_parents=True, owner='root', group='root', mode=0o0755) Execute('export AIRFLOW_HOME={0} && /opt/anaconda/bin/airflow initdb'. format(params.airflow_home_directory)) try: self.configure_airflow(env) except ExecutionFailed as ef: print("Error {0}".format(ef)) return
def stop(self, env, upgrade_type=None): import params env.set_params(params) daemon_cmd = format('source {params.conf_dir}/atlas-env.sh; {params.metadata_stop_script}') Execute(daemon_cmd, user=params.metadata_user, ) Execute (format("rm -f {params.pid_file}"))
def start(self, env): import params env.set_params(params) self.configure(env, action='start') stop_cmd = format("{ams_grafana_script} stop") Execute(stop_cmd, user=params.ams_user) start_cmd = format("{ams_grafana_script} start") Execute(start_cmd, user=params.ams_user) create_ams_datasource()
def stop(self, env, rolling_restart=False): import params env.set_params(params) daemon_cmd = format( 'source {params.conf_dir}/metadata-env.sh; {params.metadata_stop_script}' ) Execute( daemon_cmd, user=params.metadata_user, ) Execute(format("rm -f {params.pid_file}"))
def new_cached_exec(key, file_path, kinit_path, temp_dir, exec_user, keytab_file, principal, hostname): """ Entry point of an actual execution - triggered when timeout on the cache expired or on fresh execution """ now = datetime.now() temp_kinit_cache_fd, temp_kinit_cache_filename = mkstemp(dir=temp_dir) command = "%s -c %s -kt %s %s" % \ (kinit_path, temp_kinit_cache_filename, keytab_file, principal.replace("_HOST", hostname)) os.close(temp_kinit_cache_fd) try: # Ensure the proper user owns this file File(temp_kinit_cache_filename, owner=exec_user, mode=0600) # Execute the kinit Execute(command, user=exec_user) with open(file_path, 'w+') as cache_file: result = {key: {"last_successful_execution": str(now)}} json.dump(result, cache_file) finally: File(temp_kinit_cache_filename, action='delete')
def stop(self, env, upgrade_type=None): import params env.set_params(params) daemon_cmd = format( 'source {params.conf_dir}/atlas-env.sh; {params.metadata_stop_script}' ) # If the pid dir doesn't exist, this means either # 1. The user just added Atlas service and issued a restart command (stop+start). So stop should be a no-op # since there's nothing to stop. # OR # 2. The user changed the value of the pid dir config and incorrectly issued a restart command. # In which case the stop command cannot do anything since Ambari doesn't know which process to kill. # The start command will spawn another instance. # The user should have issued a stop, changed the config, and then started it. if not os.path.isdir(params.pid_dir): Logger.info( "*******************************************************************" ) Logger.info( "Will skip the stop command since this is the first time stopping/restarting Atlas " "and the pid dir does not exist, %s\n" % params.pid_dir) return try: Execute( daemon_cmd, user=params.metadata_user, ) except: show_logs(params.log_dir, params.metadata_user) raise File(params.pid_file, action="delete")
def start(self, env, upgrade_type=None): import params env.set_params(params) self.configure(env, action='start') start_cmd = format("{ams_grafana_script} start") Execute( start_cmd, user=params.ams_user, not_if=params.grafana_process_exists_cmd, ) pidfile = format("{ams_grafana_pid_dir}/grafana-server.pid") if not sudo.path_exists(pidfile): Logger.warning( "Pid file doesn't exist after starting of the component.") else: Logger.info("Grafana Server has started with pid: {0}".format( sudo.read_file(pidfile).strip())) #Set Grafana admin pwd create_grafana_admin_pwd() # Create datasource create_ams_datasource() # Create pre-built dashboards create_ams_dashboards()
def stop(self, env): import params env.set_params(params) self.configure(env, action = 'stop') Execute((format("{ams_grafana_script}"), 'stop'), sudo=True, only_if = params.grafana_process_exists_cmd, )
def service_check(self, env): import params env.set_params(params) Execute("hst status", logoutput=True, tries=3, try_sleep=20 )
def start(self, env, upgrade_type=None): import params if env is not None: env.set_params(params) # configure self.configure(env) cmd = "%s -d -p %s" % (params.elasticSearchMainCmd, params.elasticSearchPidFile) Logger.info("Start: %s" % cmd) Execute(cmd, user=params.elasticSearchUser) time.sleep(10)
def start(self, env, rolling_restart=False): import params env.set_params(params) daemon_cmd = format( 'source {params.conf_dir}/metadata-env.sh ; {params.metadata_start_script} --port {params.metadata_port}' ) no_op_test = format( 'ls {params.pid_file} >/dev/null 2>&1 && ps -p `cat {params.pid_file}` >/dev/null 2>&1' ) Execute(daemon_cmd, user=params.metadata_user, not_if=no_op_test)
def service_check(self, env): import params env.set_params(params) if params.security_enabled: Execute(format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}" ), user=params.smoke_test_user) try: Execute(params.smoke_cmd, user=params.smoke_test_user, tries=5, try_sleep=10) Logger.info('Atlas server up and running') except: Logger.debug('Atlas server not running')
def configure_ac(self, env): import params env.set_params(params) File("/etc/systemd/system/jupyter_portforward.service", content=Template("portforward_service_jupyter.j2", configurations=params), owner=params.anaconda_user, group=params.anaconda_group, mode=0o0600) Execute('sudo systemctl daemon-reload')
def start(self, env, upgrade_type=None): import params env.set_params(params) self.configure(env) daemon_cmd = format('source {params.conf_dir}/atlas-env.sh ; {params.metadata_start_script}') no_op_test = format('ls {params.pid_file} >/dev/null 2>&1 && ps -p `cat {params.pid_file}` >/dev/null 2>&1') Execute(daemon_cmd, user=params.metadata_user, not_if=no_op_test )
def configure_ac(self, env): import params env.set_params(params) print("!!!!!!!!!!!!!!!!!!!!!!!creating file!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") File("/etc/systemd/system/airflow_portforward.service", content=Template("portforward_service.j2", configurations=params), owner='root', group='root', mode=0o0600 ) Execute('sudo systemctl daemon-reload')
def start(self, env): import params env.set_params(params) self.configure(env, action='start') start_cmd = format("{ams_grafana_script} start") Execute(start_cmd, user=params.ams_user) # Create datasource create_ams_datasource() # Create pre-built dashboards create_ams_dashboards()
def start(self, env, upgrade_type=None): import params env.set_params(params) self.configure(env) daemon_cmd = format('source {params.conf_dir}/atlas-env.sh ; {params.metadata_start_script}') no_op_test = format('ls {params.pid_file} >/dev/null 2>&1 && ps -p `cat {params.pid_file}` >/dev/null 2>&1') atlas_hbase_setup_command = format("cat {atlas_hbase_setup} | hbase shell -n") secure_atlas_hbase_setup_command = format("kinit -kt {hbase_user_keytab} {hbase_principal_name}; ") + atlas_hbase_setup_command if params.stack_supports_atlas_ranger_plugin: Logger.info('Atlas plugin is enabled, configuring Atlas plugin.') setup_ranger_atlas(upgrade_type=upgrade_type) else: Logger.info('Atlas plugin is not supported or enabled.') try: effective_version = format_stack_version(params.version) if upgrade_type is not None else params.stack_version_formatted if check_stack_feature(StackFeature.ATLAS_HBASE_SETUP, effective_version): if params.security_enabled and params.has_hbase_master: Execute(secure_atlas_hbase_setup_command, tries = 5, try_sleep = 10, user=params.hbase_user ) elif params.enable_ranger_hbase and not params.security_enabled: Execute(atlas_hbase_setup_command, tries = 5, try_sleep = 10, user=params.hbase_user ) Execute(daemon_cmd, user=params.metadata_user, not_if=no_op_test ) except: show_logs(params.log_dir, params.metadata_user) raise
def install_ac(self, env): import params env.set_params(params) filestr = """[Unit] Description=Jupyter-Notebook service After=network.target StartLimitIntervalSec=0 [Service] Type=simple Restart=always RestartSec=1 User=root ExecStart=/opt/anaconda/bin/jupyter-lab --config {0}jupyter_notebook_config.py [Install] WantedBy=multi-user.target""".format(params.config_dir) if 'anaconda' in os.listdir("/opt"): print("already installed") else: Execute( "curl -o /tmp/anaconda.sh https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh" ) Execute("bash /tmp/anaconda.sh -b -p /opt/anaconda") Execute("export PATH=$PATH:/opt/anaconda/bin/") Execute('rm -f /opt/anaconda.sh') path = os.environ['PATH'] binary_directory = '/opt/anaconda/bin' print(path) if binary_directory not in path: print("Not in path") new_path = 'PATH=$PATH:' + binary_directory + "\n" with open('/root/.bash_profile') as f: text = f.readlines() if new_path not in text: text.insert(-2, new_path) text = "".join(text) with open('/root/.bash_profile', 'w') as f: f.write(text) print(text) else: print('already in path') Execute('source /root/.bash_profile') if 'jupyter' in os.listdir("/opt"): print("directory exists") else: Directory(params.config_dir, create_parents=True) Execute( 'echo "{0}" > /etc/systemd/system/jupyter.service'.format(filestr)) Execute('sudo systemctl daemon-reload') self.configure_ac(env)
def start(self, env, upgrade_type=None): import params if env is not None: env.set_params(params) # configure self.configure(env) if os.path.exists(params.kibanaLogFile): Utils.remove(params.kibanaLogFile) cmd = "nohup %s > /dev/null 2>&1 < /dev/null &" % ( params.kibanaBinFile, ) Logger.info("Start: %s" % cmd) Execute(cmd, user=params.kibanaUser) time.sleep(10)
def stop(self, env, upgrade_type=None): import params env.set_params(params) daemon_cmd = format('source {params.conf_dir}/atlas-env.sh; {params.metadata_stop_script}') try: Execute(daemon_cmd, user=params.metadata_user, ) except: show_logs(params.log_dir, params.metadata_user) raise File(params.pid_file, action="delete")
def install(self, env): self.configure(env) self.install_packages(env) package_dir = helpers.package_dir() files_dir = os.path.join(package_dir, 'files') scripts_dir = os.path.join(package_dir, 'scripts') anaconda_setup_sh = os.path.join(scripts_dir, 'shell', 'conda_setup.sh') commands = ['cd /tmp; sh ' + anaconda_setup_sh + ' ' + files_dir] for command in commands: Execute(command)
def remove_stack_folder(self, structured_output, version): if version and version != '' and stack_root and stack_root != '': Logger.info("Removing {0}/{1}".format(stack_root, version)) try: Execute(('rm', '-f', stack_root + version), sudo=True) finally: structured_output["remove_previous_stacks"] = { "exit_code": -1, "message": "Failed to remove version {0}{1}".format( stack_root, version) } self.put_structured_out(structured_output)
def index_document(self, host, port, doc='{"name": "Ambari Service Check"}', index="ambari_service_check"): """ Tests the health of Elasticsearch by indexing a document. :param host: The name of a host running Elasticsearch. :param port: The Elasticsearch HTTP port. :param doc: The test document to put. :param index: The name of the test index. """ # put a document into a new index Execute("curl -XPUT 'http://%s:%s/%s/test/1' -d '%s'" % (host, port, index, doc), logoutput=True) # retrieve the document... use subprocess because we actually need the results here. cmd_retrieve = "curl -XGET 'http://%s:%s/%s/test/1'" % (host, port, index) proc = subprocess.Popen(cmd_retrieve, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() response_retrieve = stdout Logger.info("Retrieval response is: %s" % response_retrieve) expected_retrieve = '{"_index":"%s","_type":"test","_id":"1","_version":1,"found":true,"_source":%s}' \ % (index, doc) # delete the test index cmd_delete = "curl -XDELETE 'http://%s:%s/%s'" % (host, port, index) proc = subprocess.Popen(cmd_delete, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() response_delete = stdout Logger.info("Delete index response is: %s" % response_retrieve) expected_delete = '{"acknowledged":true}' if (expected_retrieve == response_retrieve) and (expected_delete == response_delete): Logger.info("Successfully indexed document in Elasticsearch") else: Logger.info("Unable to retrieve document from Elasticsearch") sys.exit(1)
def check_cluster_health(self, host, port, status="green", timeout="120s"): """ Checks Elasticsearch cluster health. Will wait for a given health state to be reached. :param host: The name of a host running Elasticsearch. :param port: The Elasticsearch HTTP port. :param status: The expected cluster health state. By default, green. :param timeout: How long to wait for the cluster. By default, 120 seconds. """ Logger.info("Checking cluster health") cmd = "curl -sS -XGET 'http://{0}:{1}/_cluster/health?wait_for_status={2}&timeout={3}' | grep '\"status\":\"{2}\"'" Execute(cmd.format(host, port, status, timeout), logoutput=True, tries=5, try_sleep=10)
def configure_airflow(self, env): import params env.set_params(params) File("/etc/systemd/system/airflow_webserver.service", content=Template("webserver.j2", configurations=params), owner='root', group='root', mode=0o0600) File("/etc/systemd/system/airflow_scheduler.service", content=Template("scheduler.j2", configurations=params), owner='root', group='root', mode=0o0600) Execute('sudo systemctl daemon-reload')
def new_cached_exec(key, file_path, kinit_path, exec_user, keytab_file, principal, hostname): """ Entry point of an actual execution - triggered when timeout on the cache expired or on fresh execution """ now = datetime.now() _, temp_kinit_cache_file = mkstemp() command = "su -s /bin/bash - %s -c '%s -c %s -kt %s %s'" % \ (exec_user, kinit_path, temp_kinit_cache_file, keytab_file, principal.replace("_HOST", hostname)) try: Execute(command) with open(file_path, 'w+') as cache_file: result = {key: {"last_successful_execution": str(now)}} json.dump(result, cache_file) finally: os.remove(temp_kinit_cache_file)
def start(self, env, upgrade_type=None): import params env.set_params(params) self.configure(env) daemon_cmd = format('source {params.conf_dir}/atlas-env.sh ; {params.metadata_start_script}') no_op_test = format('ls {params.pid_file} >/dev/null 2>&1 && ps -p `cat {params.pid_file}` >/dev/null 2>&1') if params.stack_supports_atlas_ranger_plugin: Logger.info('Atlas plugin is enabled, configuring Atlas plugin.') setup_ranger_atlas(upgrade_type = upgrade_type) else: Logger.info('Atlas plugin is not supported or enabled.') try: Execute(daemon_cmd, user=params.metadata_user, not_if=no_op_test ) except: show_logs(params.log_dir, params.metadata_user) raise
def execute_db_connection_check(self, config): print "DB connection check started." # initialize needed data ambari_server_hostname = config['commandParams']['ambari_server_host'] check_db_connection_jar_name = "DBConnectionVerification.jar" jdk_location = config['commandParams']['jdk_location'] java64_home = config['commandParams']['java_home'] db_name = config['commandParams']['db_name'] if db_name == DB_MYSQL: jdbc_url = jdk_location + JDBC_DRIVER_SYMLINK_MYSQL jdbc_driver = JDBC_DRIVER_MYSQL jdbc_name = JDBC_DRIVER_SYMLINK_MYSQL elif db_name == DB_ORACLE: jdbc_url = jdk_location + JDBC_DRIVER_SYMLINK_ORACLE jdbc_driver = JDBC_DRIVER_ORACLE jdbc_name = JDBC_DRIVER_SYMLINK_ORACLE elif db_name == DB_POSTGRESQL: jdbc_url = jdk_location + JDBC_DRIVER_SYMLINK_POSTGRESQL jdbc_driver = JDBC_DRIVER_POSTGRESQL jdbc_name = JDBC_DRIVER_SYMLINK_POSTGRESQL db_connection_url = config['commandParams']['db_connection_url'] user_name = config['commandParams']['user_name'] user_passwd = config['commandParams']['user_passwd'] java_exec = os.path.join(java64_home, "bin", "java") if ('jdk_name' not in config['commandParams'] or config['commandParams']['jdk_name'] == None \ or config['commandParams']['jdk_name'] == '') and not os.path.isfile(java_exec): message = "Custom java is not available on host. Please install it. Java home should be the same as on server. " \ "\n" print message db_connection_check_structured_output = { "exit_code": "1", "message": message } return db_connection_check_structured_output environment = {"no_proxy": format("{ambari_server_hostname}")} artifact_dir = "/tmp/HDP-artifacts/" jdk_name = config['commandParams']['jdk_name'] jdk_curl_target = format("{artifact_dir}/{jdk_name}") java_dir = os.path.dirname(java64_home) # download DBConnectionVerification.jar from ambari-server resources if not os.path.isfile(java_exec): try: Execute(format( "mkdir -p {artifact_dir} ; curl -kf " "--retry 10 {jdk_location}/{jdk_name} -o {jdk_curl_target}" ), path=["/bin", "/usr/bin/"], environment=environment) except Exception, e: message = "Error downloading JDK from Ambari Server resources. Check network access to " \ "Ambari Server.\n" + str(e) print message db_connection_check_structured_output = { "exit_code": "1", "message": message } return db_connection_check_structured_output if jdk_name.endswith(".bin"): install_cmd = format("mkdir -p {java_dir} ; chmod +x {jdk_curl_target}; cd {java_dir} ; echo A | " \ "{jdk_curl_target} -noregister > /dev/null 2>&1") elif jdk_name.endswith(".gz"): install_cmd = format( "mkdir -p {java_dir} ; cd {java_dir} ; tar -xf {jdk_curl_target} > /dev/null 2>&1" ) try: Execute(install_cmd, path=["/bin", "/usr/bin/"]) except Exception, e: message = "Error installing java.\n" + str(e) print message db_connection_check_structured_output = { "exit_code": "1", "message": message } return db_connection_check_structured_output