def install_tez_jars(): import params destination_hdfs_dirs = get_tez_hdfs_dir_paths(params.tez_lib_uris) # If tez libraries are to be stored in hdfs if destination_hdfs_dirs: for hdfs_dir in destination_hdfs_dirs: params.HdfsDirectory(hdfs_dir, action="create_delayed", owner=params.tez_user, mode=0755 ) pass params.HdfsDirectory(None, action="create") if params.security_enabled: kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") else: kinit_if_needed = "" if kinit_if_needed: Execute(kinit_if_needed, user=params.tez_user, path='/bin' ) pass app_dir_path = None lib_dir_path = None if len(destination_hdfs_dirs) > 1: for path in destination_hdfs_dirs: if 'lib' in path: lib_dir_path = path else: app_dir_path = path pass pass pass if app_dir_path: CopyFromLocal(params.tez_local_api_jars, mode=0755, owner=params.tez_user, dest_dir=app_dir_path, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user ) pass if lib_dir_path: CopyFromLocal(params.tez_local_lib_jars, mode=0755, owner=params.tez_user, dest_dir=lib_dir_path, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user ) pass
def create_hdfs_directories(): import params params.HdfsDirectory("/tmp", action="create_delayed", owner=params.hdfs_user, mode=0777) params.HdfsDirectory(params.smoke_hdfs_user_dir, action="create_delayed", owner=params.smoke_user, mode=params.smoke_hdfs_user_mode) params.HdfsDirectory(None, action="create")
def falcon(type, action=None): import params if action == 'config': Directory(params.falcon_pid_dir, owner=params.falcon_user) Directory(params.falcon_log_dir, owner=params.falcon_user) Directory(params.falcon_webapp_dir, owner=params.falcon_user) Directory(params.falcon_home, owner=params.falcon_user) Directory(params.falcon_conf_dir, owner=params.falcon_user, recursive=True) File(params.falcon_conf_dir + '/falcon-env.sh', content=InlineTemplate(params.falcon_env_sh_template)) File(params.falcon_conf_dir + '/client.properties', content=Template('client.properties.j2'), mode=0644) PropertiesFile(params.falcon_conf_dir + '/runtime.properties', properties=params.falcon_runtime_properties, mode=0644) PropertiesFile(params.falcon_conf_dir + '/startup.properties', properties=params.falcon_startup_properties, mode=0644) if type == 'server': if action == 'config': if params.store_uri[0:4] == "hdfs": params.HdfsDirectory(params.store_uri, action="create_delayed", owner=params.falcon_user, mode=0755) params.HdfsDirectory( params.flacon_apps_dir, action="create_delayed", owner=params.falcon_user, mode=0777 #TODO change to proper mode ) params.HdfsDirectory(None, action="create") Directory(params.falcon_local_dir, owner=params.falcon_user, recursive=True) if params.falcon_embeddedmq_enabled == True: Directory(params.falcon_embeddedmq_data, owner=params.falcon_user, recursive=True) if action == 'start': Execute( format('{falcon_home}/bin/falcon-start -port {falcon_port}'), user=params.falcon_user, path=params.hadoop_bin_dir) if action == 'stop': Execute(format('{falcon_home}/bin/falcon-stop'), user=params.falcon_user, path=params.hadoop_bin_dir) File(params.server_pid_file, action='delete')
def falcon(type, action=None): import params Directory(params.falcon_pid_dir, owner=params.falcon_user) Directory(params.falcon_log_dir, owner=params.falcon_user) Directory(params.falcon_webapp_dir, owner=params.falcon_user) if type == 'client': if action == 'config': File(params.falcon_conf_dir + '/client.properties', content=Template('client.properties.j2'), mode=0644) elif type == 'server': if action == 'config': if params.store_uri[0:4] == "hdfs": params.HdfsDirectory(params.store_uri, action="create_delayed", owner=params.falcon_user, mode=0755) params.HdfsDirectory( params.flacon_apps_dir, action="create_delayed", owner=params.falcon_user, mode=0777 #TODO change to proper mode ) params.HdfsDirectory(None, action="create") Directory(params.falcon_local_dir, owner=params.falcon_user, recursive=True) Directory(params.falcon_data_dir, owner=params.falcon_user, recursive=True) PropertiesFile(params.falcon_conf_dir + '/runtime.properties', properties=params.falcon_runtime_properties, mode=0644) PropertiesFile(params.falcon_conf_dir + '/startup.properties', properties=params.falcon_startup_properties, mode=0644) if action == 'start': Execute(format( 'env JAVA_HOME={java_home} FALCON_LOG_DIR={falcon_log_dir} ' 'FALCON_PID_DIR=/var/run/falcon FALCON_DATA_DIR={falcon_data_dir} ' '{falcon_home}/bin/falcon-start -port {falcon_port}'), user=params.falcon_user) if action == 'stop': Execute(format( 'env JAVA_HOME={java_home} FALCON_LOG_DIR={falcon_log_dir} ' 'FALCON_PID_DIR=/var/run/falcon FALCON_DATA_DIR={falcon_data_dir} ' '{falcon_home}/bin/falcon-stop'), user=params.falcon_user) File(params.server_pid_file, action='delete')
def install_hive_exec_jar(self, params): hdfs_path_prefix = 'hdfs://' if params.tez_lib_uris: hdfs_path = params.hive_exec_hdfs_path if hdfs_path.strip().find(hdfs_path_prefix, 0) != -1: hdfs_path = hdfs_path.replace(hdfs_path_prefix, '') pass params.HdfsDirectory(hdfs_path, action="create", owner=params.hive_user, mode=0755) if params.security_enabled: kinit_if_needed = format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_user};") else: kinit_if_needed = "" if kinit_if_needed: Execute(kinit_if_needed, user=params.tez_user, path='/bin') CopyFromLocal(params.hive_exec_jar_path, mode=0655, owner=params.hive_user, dest_dir=hdfs_path, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user) pass
def create_hdfs_directories(check): import params params.HdfsDirectory("/tmp", action="create_delayed", owner=params.hdfs_user, mode=0777) params.HdfsDirectory(params.smoke_hdfs_user_dir, action="create_delayed", owner=params.smoke_user, mode=params.smoke_hdfs_user_mode) params.HdfsDirectory( None, action="create", only_if=check #skip creation when HA not active )
def start(self, env): import params self.configure(env) # Check HDFS set up # Must be in start section, since we need HDFS running params.HdfsDirectory( "/hbase/archive", action="create_on_execute", owner=params.hbase_user, group=params.hbase_user, ) params.HdfsDirectory( params.hbase_staging, action="create_on_execute", owner=params.hbase_user, group=params.hbase_user, ) params.HdfsDirectory( "/user/trafodion", action="create_on_execute", owner=params.traf_user, group=params.traf_group, mode=0755, ) params.HdfsDirectory( "/user/trafodion/trafodion_backups", action="create_on_execute", owner=params.traf_user, group=params.traf_group, ) params.HdfsDirectory( "/user/trafodion/bulkload", action="create_on_execute", owner=params.traf_user, group=params.user_group, mode=0750, ) params.HdfsDirectory( "/user/trafodion/lobs", action="create_on_execute", owner=params.traf_user, group=params.traf_group, ) params.HdfsDirectory(None, action="execute") try: cmd = "hdfs dfs -setfacl -R -m user:%s:rwx,default:user:%s:rwx,mask::rwx /hbase/archive" % \ (params.traf_user, params.traf_user) Execute(cmd, user=params.hdfs_user) except: print "Error: HDFS ACLs must be enabled for config of hdfs:/hbase/archive" print " Re-start HDFS, HBase, and other affected components before starting Trafodion" raise Fail("Need HDFS component re-start") # Start trafodion Execute('source ~/.bashrc ; sqstart', user=params.traf_user, logoutput=True)
def oozie( is_server=False # TODO: see if see can remove this ): import params if is_server: params.HdfsDirectory(params.oozie_hdfs_user_dir, action="create", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) #TODO hack for falcon el oozie_site = dict(params.config['configurations']['oozie-site']) oozie_site[ "oozie.services.ext"] = 'org.apache.oozie.service.JMSAccessorService,' + oozie_site[ "oozie.services.ext"] XmlConfig("oozie-site.xml", conf_dir=params.conf_dir, configurations=oozie_site, owner=params.oozie_user, group=params.user_group, mode=0664) Directory(params.conf_dir, owner=params.oozie_user, group=params.user_group) TemplateConfig(format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=params.log4j_props) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) environment = {"no_proxy": format("{ambari_server_hostname}")} if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": Execute(format("/bin/sh -c 'cd /usr/lib/ambari-agent/ &&\ curl -kf \ --retry 5 {jdk_location}{check_db_connection_jar_name}\ -o {check_db_connection_jar_name}'"), not_if=format("[ -f {check_db_connection_jar} ]"), environment=environment) oozie_ownership() if is_server: oozie_server_specific()
def oozie(is_server=False): import params if is_server: params.HdfsDirectory(params.oozie_hdfs_user_dir, action="create", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) XmlConfig( "oozie-site.xml", conf_dir=params.conf_dir, configurations=params.config['configurations']['oozie-site'], configuration_attributes=params.config['configuration_attributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0664) Directory(params.conf_dir, owner=params.oozie_user, group=params.user_group) File(format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template)) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=params.log4j_props) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) environment = {"no_proxy": format("{ambari_server_hostname}")} if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": Execute(format("/bin/sh -c 'cd /usr/lib/ambari-agent/ &&\ curl -kf -x \"\" \ --retry 5 {jdk_location}{check_db_connection_jar_name}\ -o {check_db_connection_jar_name}'"), not_if=format("[ -f {check_db_connection_jar} ]"), environment=environment) oozie_ownership() if is_server: oozie_server_specific()
def mapreduce(name=None): import params if name in ["jobtracker", "historyserver"]: params.HdfsDirectory("/mapred", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory("/mapred/system", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory("/mapred/history", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsDirectory(params.mapreduce_jobhistory_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsDirectory(None, action="create") Directory(params.mapred_pid_dir, owner=params.mapred_user, group=params.user_group, recursive=True) mapred_log_dir = os.path.join(params.mapred_log_dir_prefix, params.mapred_user) Directory(mapred_log_dir, recursive=True, owner=params.mapred_user, group=params.user_group) if name == 'jobtracker': File(os.path.join(mapred_log_dir, 'hadoop-mapreduce.jobsummary.log'), owner=params.mapred_user, group=params.user_group, mode=0664) Directory(params.mapred_local_dir.split(','), owner=params.mapred_user, mode=0755, recursive=True, ignore_failures=True) File( params.exclude_file_path, owner=params.mapred_user, group=params.user_group, ) File( params.mapred_hosts_file_path, owner=params.mapred_user, group=params.user_group, )
def setup_spark(env, type, action=None): import params env.set_params(params) Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, recursive=True) if type == 'server': if action == 'start' or action == 'config': params.HdfsDirectory(params.spark_hdfs_user_dir, action="create", owner=params.spark_user, mode=0775) file_path = params.spark_conf + '/spark-defaults.conf' create_file(file_path) write_properties_to_file(file_path, spark_properties(params)) # create spark-env.sh in etc/conf dir File(os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh)) #create log4j.properties in etc/conf dir File(os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties)) File(os.path.join(params.spark_conf, 'java-opts'), owner=params.spark_user, group=params.spark_group, content=params.spark_javaopts_properties) if params.is_hive_installed: hive_config = get_hive_config() XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=hive_config, owner=params.spark_user, group=params.spark_group, mode=0644)
def setup_custom_scratchdir(): import params if not is_empty( params.hive_exec_scratchdir ) and not params.hive_exec_scratchdir.startswith( "/tmp" ): # If this property is custom and not a variation of the writable temp dir params.HdfsDirectory( params.hive_exec_scratchdir, action="create_delayed", owner=params.hive_user, group=params.hdfs_user, mode=0777 ) # Hive expects this dir to be writeable by everyone as it is used as a temp dir
def setup_spark(env, type, action=None): import params Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, recursive=True) if type == 'server' and action == 'config': params.HdfsDirectory(params.spark_hdfs_user_dir, action="create", owner=params.spark_user, mode=0775) PropertiesFile( format("{spark_conf}/spark-defaults.conf"), properties=params.config['configurations']['spark-defaults'], key_value_delimiter=" ", ) # create spark-env.sh in etc/conf dir File(os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh)) #create log4j.properties in etc/conf dir File(os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties)) File(os.path.join(params.spark_conf, 'java-opts'), owner=params.spark_user, group=params.spark_group, content=params.spark_javaopts_properties) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644)
def _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed): """ :param source_and_dest_pairs: List of tuples (x, y), where x is the source file in the local file system, and y is the destination file path in HDFS :param file_owner: Owner to set for the file copied to HDFS (typically hdfs account) :param group_owner: Owning group to set for the file copied to HDFS (typically hadoop group) :param kinit_if_needed: kinit command if it is needed, otherwise an empty string :return: Returns 0 if at least one file was copied and no exceptions occurred, and 1 otherwise. Must kinit before calling this function. """ import params return_value = 1 if source_and_dest_pairs and len(source_and_dest_pairs) > 0: return_value = 0 for (source, destination) in source_and_dest_pairs: try: destination_dir = os.path.dirname(destination) params.HdfsDirectory(destination_dir, action="create", owner=file_owner, mode=0555 ) CopyFromLocal(source, mode=0444, owner=file_owner, group=group_owner, dest_dir=destination_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) except: return_value = 1 return return_value
def mapreduce(name=None): import params if name in ["jobtracker", "historyserver"]: params.HdfsDirectory("/mapred", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory("/mapred/system", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory("/mapred/history", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsDirectory(params.mapreduce_jobhistory_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsDirectory(None, action="create") Directory([params.mapred_pid_dir, params.mapred_log_dir], owner=params.mapred_user, group=params.user_group, recursive=True) Directory(params.mapred_local_dir.split(','), owner=params.mapred_user, mode=0755, recursive=True) File( params.exclude_file_path, owner=params.mapred_user, group=params.user_group, ) File( params.mapred_hosts_file_path, owner=params.mapred_user, group=params.user_group, )
def oozie(is_server=False): import params if is_server: params.HdfsDirectory(params.oozie_hdfs_user_dir, action="create", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) Directory(params.conf_dir, recursive=True, owner=params.oozie_user, group=params.user_group) XmlConfig( "oozie-site.xml", conf_dir=params.conf_dir, configurations=params.oozie_site, configuration_attributes=params.config['configuration_attributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0664) File(format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template)) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=params.log4j_props) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) if params.hdp_stack_version != "" and compare_versions( params.hdp_stack_version, '2.2') >= 0: File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_user=params.oozie_user)) else: File(format("{params.conf_dir}/adminusers.txt"), owner=params.oozie_user, group=params.user_group) if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), ) pass oozie_ownership() if is_server: oozie_server_specific()
def yarn(name = None): import params if name in ["nodemanager","historyserver"]: if params.yarn_log_aggregation_enabled: params.HdfsDirectory(params.yarn_nm_app_log_dir, action="create_delayed", owner=params.yarn_user, group=params.user_group, mode=0777, recursive_chmod=True ) params.HdfsDirectory("/mapred", action="create_delayed", owner=params.mapred_user ) params.HdfsDirectory("/mapred/system", action="create_delayed", owner=params.hdfs_user ) params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777 ) params.HdfsDirectory(params.mapreduce_jobhistory_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=01777 ) params.HdfsDirectory(None, action="create") if name == "nodemanager": Directory(params.nm_local_dirs.split(',') + params.nm_log_dirs.split(','), owner=params.yarn_user, recursive=True, ignore_failures=True, ) Directory([params.yarn_pid_dir, params.yarn_log_dir], owner=params.yarn_user, group=params.user_group, recursive=True ) Directory([params.mapred_pid_dir, params.mapred_log_dir], owner=params.mapred_user, group=params.user_group, recursive=True ) Directory([params.yarn_log_dir_prefix], owner=params.yarn_user, recursive=True, ignore_failures=True, ) XmlConfig("core-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes']['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644 ) XmlConfig("mapred-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes']['mapred-site'], owner=params.yarn_user, group=params.user_group, mode=0644 ) XmlConfig("yarn-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configuration_attributes']['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644 ) XmlConfig("capacity-scheduler.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes']['capacity-scheduler'], owner=params.yarn_user, group=params.user_group, mode=0644 ) if name == 'resourcemanager': File(params.yarn_job_summary_log, owner=params.yarn_user, group=params.user_group ) elif name == 'apptimelineserver': Directory(params.ats_leveldb_dir, owner=params.yarn_user, group=params.user_group, recursive=True ) File(params.rm_nodes_exclude_path, owner=params.yarn_user, group=params.user_group ) File(format("{limits_conf_dir}/yarn.conf"), mode=0644, content=Template('yarn.conf.j2') ) File(format("{limits_conf_dir}/mapreduce.conf"), mode=0644, content=Template('mapreduce.conf.j2') ) File(format("{config_dir}/yarn-env.sh"), owner=params.yarn_user, group=params.user_group, mode=0755, content=InlineTemplate(params.yarn_env_sh_template) ) if params.security_enabled: container_executor = format("{yarn_container_bin}/container-executor") File(container_executor, group=params.yarn_executor_container_group, mode=06050 ) File(format("{config_dir}/container-executor.cfg"), group=params.user_group, mode=0644, content=Template('container-executor.cfg.j2') ) if params.security_enabled: tc_mode = 0644 tc_owner = "root" else: tc_mode = None tc_owner = params.hdfs_user File(format("{config_dir}/mapred-env.sh"), owner=tc_owner, content=InlineTemplate(params.mapred_env_sh_template) ) if params.security_enabled: File(os.path.join(params.hadoop_bin, "task-controller"), owner="root", group=params.mapred_tt_group, mode=06050 ) File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner = tc_owner, mode = tc_mode, group = params.mapred_tt_group, content=Template("taskcontroller.cfg.j2") ) else: File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner=tc_owner, content=Template("taskcontroller.cfg.j2") ) if "mapred-site" in params.config['configurations']: XmlConfig("mapred-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes']['mapred-site'], owner=params.mapred_user, group=params.user_group ) if "capacity-scheduler" in params.config['configurations']: XmlConfig("capacity-scheduler.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations'][ 'capacity-scheduler'], configuration_attributes=params.config['configuration_attributes']['capacity-scheduler'], owner=params.hdfs_user, group=params.user_group ) if os.path.exists(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml')): File(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml'), owner=params.mapred_user, group=params.user_group ) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example'), owner=params.mapred_user, group=params.user_group ) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example'), owner=params.mapred_user, group=params.user_group )
def hbase( name=None # 'master' or 'regionserver' or 'client' ): import params Directory(params.hbase_conf_dir, owner=params.hbase_user, group=params.user_group, recursive=True) Directory(params.tmp_dir, owner=params.hbase_user, recursive=True) Directory(os.path.join(params.local_dir, "jars"), owner=params.hbase_user, group=params.user_group, mode=0775, recursive=True) XmlConfig("hbase-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-site'], owner=params.hbase_user, group=params.user_group) XmlConfig("hdfs-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hdfs-site'], owner=params.hbase_user, group=params.user_group) XmlConfig("hdfs-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['hdfs-site'], owner=params.hdfs_user, group=params.user_group) if 'hbase-policy' in params.config['configurations']: XmlConfig( "hbase-policy.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-policy'], owner=params.hbase_user, group=params.user_group) # Manually overriding ownership of file installed by hadoop package else: File(format("{params.hbase_conf_dir}/hbase-policy.xml"), owner=params.hbase_user, group=params.user_group) hbase_TemplateConfig('hbase-env.sh') hbase_TemplateConfig( params.metric_prop_file_name, tag='GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS') hbase_TemplateConfig('regionservers') if params.security_enabled: hbase_TemplateConfig(format("hbase_{name}_jaas.conf")) if name != "client": Directory(params.pid_dir, owner=params.hbase_user, recursive=True) Directory(params.log_dir, owner=params.hbase_user, recursive=True) if (params.log4j_props != None): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user, content=params.log4j_props) elif (os.path.exists(format("{params.hbase_conf_dir}/log4j.properties"))): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user) if name in ["master", "regionserver"]: params.HdfsDirectory(params.hbase_hdfs_root_dir, action="create_delayed", owner=params.hbase_user) params.HdfsDirectory(params.hbase_staging_dir, action="create_delayed", owner=params.hbase_user, mode=0711) params.HdfsDirectory(None, action="create")
def mapreduce(name=None): import params if name in ["jobtracker", "historyserver"]: params.HdfsDirectory("/mapred", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory("/mapred/system", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory("/mapred/history", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsDirectory(params.mapreduce_jobhistory_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsDirectory(None, action="create") Directory(params.mapred_pid_dir, owner=params.mapred_user, group=params.user_group, recursive=True) mapred_log_dir = os.path.join(params.mapred_log_dir_prefix, params.mapred_user) mapred_userlogs_dir = os.path.join(mapred_log_dir, "userlogs") Directory(mapred_log_dir, recursive=True, owner=params.mapred_user, group=params.user_group) Directory(mapred_userlogs_dir, recursive=True, mode=01777) if name == 'jobtracker': File(os.path.join(mapred_log_dir, 'hadoop-mapreduce.jobsummary.log'), owner=params.mapred_user, group=params.user_group, mode=0664) Directory(params.mapred_local_dir.split(','), owner=params.mapred_user, mode=0755, recursive=True, ignore_failures=True) File( params.exclude_file_path, owner=params.mapred_user, group=params.user_group, ) File( params.mapred_hosts_file_path, owner=params.mapred_user, group=params.user_group, ) if params.security_enabled: tc_mode = 0644 tc_owner = "root" else: tc_mode = None tc_owner = params.hdfs_user if params.security_enabled: File(os.path.join(params.hadoop_bin, "task-controller"), owner="root", group=params.mapred_tt_group, mode=06050) File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner=tc_owner, mode=tc_mode, group=params.mapred_tt_group, content=Template("taskcontroller.cfg.j2")) else: File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner=tc_owner, content=Template("taskcontroller.cfg.j2")) if "capacity-scheduler" in params.config['configurations']: XmlConfig("capacity-scheduler.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations'] ['capacity-scheduler'], owner=params.hdfs_user, group=params.user_group) if "mapred-queue-acls" in params.config['configurations']: XmlConfig("mapred-queue-acls.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations'] ['mapred-queue-acls'], owner=params.mapred_user, group=params.user_group) elif os.path.exists( os.path.join(params.hadoop_conf_dir, "mapred-queue-acls.xml")): File(os.path.join(params.hadoop_conf_dir, "mapred-queue-acls.xml"), owner=params.mapred_user, group=params.user_group) if "mapred-site" in params.config['configurations']: XmlConfig( "mapred-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['mapred-site'], owner=params.mapred_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml')): File(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml'), owner=params.mapred_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example'), owner=params.mapred_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example'), owner=params.mapred_user, group=params.user_group)
def yarn(name=None): import params if name in ["nodemanager", "historyserver"]: if params.yarn_log_aggregation_enabled: params.HdfsDirectory(params.yarn_nm_app_log_dir, action="create_delayed", owner=params.yarn_user, group=params.user_group, mode=0777, recursive_chmod=True) params.HdfsDirectory("/mapred", action="create_delayed", owner=params.mapred_user) params.HdfsDirectory("/mapred/system", action="create_delayed", owner=params.hdfs_user) params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsDirectory(params.mapreduce_jobhistory_done_dir, action="create_delayed", owner=params.mapred_user, group=params.user_group, mode=01777) params.HdfsDirectory(None, action="create") if name == "nodemanager": Directory(params.nm_local_dirs.split(','), owner=params.yarn_user, create_parents=True) Directory(params.nm_log_dirs.split(','), owner=params.yarn_user, create_parents=True) Directory([params.yarn_pid_dir, params.yarn_log_dir], owner=params.yarn_user, group=params.user_group, create_parents=True) Directory([params.mapred_pid_dir, params.mapred_log_dir], owner=params.mapred_user, group=params.user_group, create_parents=True) Directory(params.yarn_log_dir_prefix, owner=params.yarn_user, create_parents=True) XmlConfig("core-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configurationAttributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) XmlConfig("mapred-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configurationAttributes'] ['mapred-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig("yarn-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configurationAttributes'] ['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "capacity-scheduler.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configurationAttributes'] ['capacity-scheduler'], owner=params.yarn_user, group=params.user_group, mode=0644) if name == 'resourcemanager': File(params.yarn_job_summary_log, owner=params.yarn_user, group=params.user_group) File(params.rm_nodes_exclude_path, owner=params.yarn_user, group=params.user_group) File(format("{limits_conf_dir}/yarn.conf"), mode=0644, content=Template('yarn.conf.j2')) File(format("{limits_conf_dir}/mapreduce.conf"), mode=0644, content=Template('mapreduce.conf.j2')) File(format("{config_dir}/yarn-env.sh"), owner=params.yarn_user, group=params.user_group, mode=0755, content=Template('yarn-env.sh.j2')) if params.security_enabled: container_executor = format("{yarn_container_bin}/container-executor") File(container_executor, group=params.yarn_executor_container_group, mode=06050) File(format("{config_dir}/container-executor.cfg"), group=params.user_group, mode=0644, content=Template('container-executor.cfg.j2'))
def hbase( name=None # 'master' or 'regionserver' or 'client' ): import params Directory(params.hbase_conf_dir, owner=params.hbase_user, group=params.user_group, recursive=True) Directory(params.hbase_tmp_dir, owner=params.hbase_user, cd_access="a", recursive=True) Directory(os.path.join(params.local_dir, "jars"), owner=params.hbase_user, group=params.user_group, cd_access="a", mode=0775, recursive=True) merged_ams_hbase_site = {} merged_ams_hbase_site.update( params.config['configurations']['ams-hbase-site']) if params.security_enabled: merged_ams_hbase_site.update( params.config['configurations']['ams-hbase-security-site']) XmlConfig( "hbase-site.xml", conf_dir=params.hbase_conf_dir, configurations=merged_ams_hbase_site, configuration_attributes=params.config['configuration_attributes'] ['ams-hbase-site'], owner=params.hbase_user, group=params.user_group) # Phoenix spool file dir if not /tmp if not os.path.exists(params.phoenix_server_spool_dir): Directory(params.phoenix_server_spool_dir, owner=params.ams_user, mode=0755, group=params.user_group, cd_access="a", recursive=True) pass if 'ams-hbase-policy' in params.config['configurations']: XmlConfig( "hbase-policy.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['ams-hbase-policy'], configuration_attributes=params.config['configuration_attributes'] ['ams-hbase-policy'], owner=params.hbase_user, group=params.user_group) # Manually overriding ownership of file installed by hadoop package else: File(format("{params.hbase_conf_dir}/hbase-policy.xml"), owner=params.hbase_user, group=params.user_group) File(format("{hbase_conf_dir}/hbase-env.sh"), owner=params.hbase_user, content=InlineTemplate(params.hbase_env_sh_template)) # Metrics properties File(os.path.join(params.hbase_conf_dir, "hadoop-metrics2-hbase.properties"), owner=params.hbase_user, group=params.user_group, content=Template("hadoop-metrics2-hbase.properties.j2")) # hbase_TemplateConfig( params.metric_prop_file_name, # tag = 'GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS' # ) hbase_TemplateConfig('regionservers', user=params.hbase_user) if params.security_enabled: hbase_TemplateConfig(format("hbase_{name}_jaas.conf"), user=params.hbase_user) hbase_TemplateConfig(format("hbase_client_jaas.conf"), user=params.hbase_user) hbase_TemplateConfig(format("ams_zookeeper_jaas.conf"), user=params.hbase_user) if name in ["master", "regionserver"]: if params.is_hbase_distributed: params.HdfsDirectory(params.hbase_root_dir, action="create_delayed", owner=params.hbase_user, mode=0775) params.HdfsDirectory(params.hbase_staging_dir, action="create_delayed", owner=params.hbase_user, mode=0711) params.HdfsDirectory(None, action="create") else: local_root_dir = params.hbase_root_dir #cut protocol name if local_root_dir.startswith("file://"): local_root_dir = local_root_dir[7:] #otherwise assume dir name is provided as is Directory(local_root_dir, owner=params.hbase_user, cd_access="a", recursive=True) if name != "client": Directory(params.hbase_pid_dir, owner=params.hbase_user, recursive=True) Directory(params.hbase_log_dir, owner=params.hbase_user, recursive=True) if params.hbase_log4j_props is not None: File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user, content=params.hbase_log4j_props) elif os.path.exists(format("{params.hbase_conf_dir}/log4j.properties")): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user)
def accumulo( name=None # 'master' or 'tserver' or 'client' ): import params Directory(params.accumulo_conf_dir, owner=params.accumulo_user, recursive=True) XmlConfig( "accumulo-site.xml", conf_dir=params.accumulo_conf_dir, configurations=params.config['configurations']['accumulo-site'], configuration_attributes=params.config['configuration_attributes'] ['accumulo-site'], owner=params.accumulo_user, mode=0600) XmlConfig( "hdfs-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes'] ['hdfs-site'], owner=params.hdfs_user, ) if 'accumulo-policy' in params.config['configurations']: XmlConfig( "accumulo-policy.xml", conf_dir=params.accumulo_conf_dir, configurations=params.config['configurations']['accumulo-policy'], configuration_attributes=params.config['configuration_attributes'] ['accumulo-policy'], owner=params.accumulo_user, ) else: File( format("{params.accumulo_conf_dir}/accumulo-policy.xml"), owner=params.accumulo_user, ) Directory(params.log_dir, owner=params.accumulo_user, recursive=True) if (params.log4j_props != None): File(format("{params.accumulo_conf_dir}/log4j.properties"), mode=0644, owner=params.accumulo_user, content=params.log4j_props) elif (os.path.exists( format("{params.accumulo_conf_dir}/log4j.properties"))): File(format("{params.accumulo_conf_dir}/log4j.properties"), mode=0644, owner=params.accumulo_user) if name in ["master", "tserver"]: params.HdfsDirectory( format("{params.accumulo_hdfs_root_dir}"), action="create_delayed", owner=params.accumulo_user, ) params.HdfsDirectory(format("{params.accumulo_hdfs_stage_dir}"), action="create_delayed", owner=params.accumulo_user, mode=0751) params.HdfsDirectory(None, action="create") accumulo_StaticFile("auditLog.xml") accumulo_StaticFile("generic_logger.xml") accumulo_StaticFile("monitor_logger.xml") accumulo_StaticFile("accumulo-metrics.xml") accumulo_StaticFile("tracers") accumulo_StaticFile("gc") accumulo_StaticFile("monitor") accumulo_StaticFile('slaves') accumulo_StaticFile('masters') accumulo_TemplateConfig('accumulo-env.sh')
def hive(name=None): import params if name == "hiveserver2": params.HdfsDirectory(params.hive_apps_whs_dir, action="create_delayed", owner=params.hive_user, mode=0777) params.HdfsDirectory(params.hive_hdfs_user_dir, action="create_delayed", owner=params.hive_user, mode=params.hive_hdfs_user_mode) params.HdfsDirectory(None, action="create") if name == 'metastore' or name == 'hiveserver2': hive_config_dir = params.hive_server_conf_dir config_file_mode = 0600 jdbc_connector() else: hive_config_dir = params.hive_conf_dir config_file_mode = 0644 Directory(hive_config_dir, owner=params.hive_user, group=params.user_group, recursive=True) XmlConfig("hive-site.xml", conf_dir=hive_config_dir, configurations=params.config['configurations']['hive-site'], owner=params.hive_user, group=params.user_group, mode=config_file_mode) cmd = format( "/bin/sh -c 'cd /usr/lib/ambari-agent/ && curl -kf --retry 5 " "{jdk_location}{check_db_connection_jar_name} -o {check_db_connection_jar_name}'" ) Execute(cmd, not_if=format("[ -f {check_db_connection_jar_name}]")) if name == 'metastore': File(params.start_metastore_path, mode=0755, content=StaticFile('startMetastore.sh')) elif name == 'hiveserver2': File(params.start_hiveserver2_path, mode=0755, content=StaticFile('startHiveserver2.sh')) if name != "client": crt_directory(params.hive_pid_dir) crt_directory(params.hive_log_dir) crt_directory(params.hive_var_lib) File(format("{hive_config_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, content=Template('hive-env.sh.j2', conf_dir=hive_config_dir)) crt_file(format("{hive_conf_dir}/hive-default.xml.template")) crt_file(format("{hive_conf_dir}/hive-env.sh.template")) log4j_exec_filename = 'hive-exec-log4j.properties' if (params.log4j_exec_props != None): File(format("{params.hive_conf_dir}/{log4j_exec_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=params.log4j_exec_props) elif (os.path.exists( "{params.hive_conf_dir}/{log4j_exec_filename}.template")): File(format("{params.hive_conf_dir}/{log4j_exec_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=StaticFile( format( "{params.hive_conf_dir}/{log4j_exec_filename}.template"))) log4j_filename = 'hive-log4j.properties' if (params.log4j_props != None): File(format("{params.hive_conf_dir}/{log4j_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=params.log4j_props) elif (os.path.exists("{params.hive_conf_dir}/{log4j_filename}.template")): File(format("{params.hive_conf_dir}/{log4j_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=StaticFile( format("{params.hive_conf_dir}/{log4j_filename}.template")))
def scdf(name=None): import params if name == "server": params.HdfsDirectory(params.deployer_dir, action=params.action_create_delayed, owner=params.scdf_user, mode=0777) params.HdfsDirectory(params.scdf_hdfs_user_dir, action=params.action_create_delayed, owner=params.scdf_user, mode=0777) params.HdfsDirectory(None, action=params.action_create) try: Directory(params.log_dir, owner=params.scdf_user, group=params.user_group, mode=0775, recursive=True) except Fail: Directory(params.log_dir, owner=params.scdf_user, group=params.user_group, mode=0775, create_parents=True) try: Directory([params.pid_dir, params.data_dir, params.conf_dir], owner=params.scdf_user, group=params.user_group, recursive=True) except Fail: Directory([params.pid_dir, params.data_dir, params.conf_dir], owner=params.scdf_user, group=params.user_group, create_parents=True) dfs_ha_map = {} if params.dfs_ha_enabled: for nn_id in params.dfs_ha_namemodes_ids_list: nn_host = params.config['configurations']['hdfs-site'][format( 'dfs.namenode.rpc-address.{dfs_ha_nameservices}.{nn_id}')] dfs_ha_map[nn_id] = nn_host configurations = params.config['configurations']['scdf-site'] sec_filtered_map = {} for key, value in configurations.iteritems(): if "security" in value: sec_filtered_map[key] = value File(format("{conf_dir}/servers.yml"), content=Template("servers.yml.j2", extra_imports=[escape_yaml_property], dfs_ha_map=dfs_ha_map, configurations=configurations), owner=params.scdf_user, group=params.user_group) File(format("{conf_dir}/scdf_kafka_jaas.conf"), content=Template("scdf_kafka_jaas.conf.j2", configurations=configurations), owner=params.scdf_user, group=params.user_group) File(format("{conf_dir}/scdf-shell.init"), content=Template("scdf-shell.init.j2", dfs_ha_map=dfs_ha_map), owner=params.scdf_user, group=params.user_group) File(format("{conf_dir}/hadoop.properties"), content=Template("hadoop.properties.j2", dfs_ha_map=dfs_ha_map, sec_filtered_map=sec_filtered_map), owner=params.scdf_user, group=params.user_group) File(format("{conf_dir}/scdf-server-env.sh"), owner=params.scdf_user, content=InlineTemplate(params.scdf_server_env_sh_template)) File(format("{conf_dir}/scdf-shell-env.sh"), owner=params.scdf_user, content=InlineTemplate(params.scdf_shell_env_sh_template))
def _copy_files(source_and_dest_pairs, component_user, file_owner, group_owner, kinit_if_needed): """ :param source_and_dest_pairs: List of tuples (x, y), where x is the source file in the local file system, and y is the destination file path in HDFS :param component_user: User that will execute the Hadoop commands, usually smokeuser :param file_owner: Owner to set for the file copied to HDFS (typically hdfs account) :param group_owner: Owning group to set for the file copied to HDFS (typically hadoop group) :param kinit_if_needed: kinit command if it is needed, otherwise an empty string :return: Returns 0 if at least one file was copied and no exceptions occurred, and 1 otherwise. Must kinit before calling this function. """ import params return_value = 1 if source_and_dest_pairs and len(source_and_dest_pairs) > 0: return_value = 0 for (source, destination) in source_and_dest_pairs: try: destination_dir = os.path.dirname(destination) params.HdfsDirectory( destination_dir, action="create", owner=file_owner, hdfs_user=params. hdfs_user, # this will be the user to run the commands as mode=0555) # Because CopyFromLocal does not guarantee synchronization, it's possible for two processes to first attempt to # copy the file to a temporary location, then process 2 fails because the temporary file was already created by # process 1, so process 2 tries to clean up by deleting the temporary file, and then process 1 # cannot finish the copy to the final destination, and both fail! # For this reason, the file name on the destination must be unique, and we then rename it to the intended value. # The rename operation is synchronized by the Namenode. orig_dest_file_name = os.path.split(destination)[1] unique_string = str(uuid.uuid4())[:8] new_dest_file_name = orig_dest_file_name + "." + unique_string new_destination = os.path.join(destination_dir, new_dest_file_name) CopyFromLocal( source, mode=0444, owner=file_owner, group=group_owner, user=params. hdfs_user, # this will be the user to run the commands as dest_dir=destination_dir, dest_file=new_dest_file_name, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir) mv_command = format("fs -mv {new_destination} {destination}") ExecuteHadoop(mv_command, user=params.hdfs_user, bin_dir=params.hadoop_bin_dir, conf_dir=params.hadoop_conf_dir) except Exception, e: Logger.error( "Failed to copy file. Source: %s, Destination: %s. Error: %s" % (source, destination, e.message)) return_value = 1
def setup_conf_dir( name=None ): # 'master' or 'tserver' or 'monitor' or 'gc' or 'tracer' or 'client' import params # create the conf directory Directory(params.conf_dir, mode=0755, owner=params.accumulo_user, group=params.user_group, recursive=True) if name == 'client': dest_conf_dir = params.conf_dir # create a site file for client processes configs = {} configs.update(params.config['configurations']['accumulo-site']) if "instance.secret" in configs: configs.pop("instance.secret") if "trace.token.property.password" in configs: configs.pop("trace.token.property.password") XmlConfig( "accumulo-site.xml", conf_dir=dest_conf_dir, configurations=configs, configuration_attributes=params.config['configuration_attributes'] ['accumulo-site'], owner=params.accumulo_user, group=params.user_group, mode=0644) # create env file File(format("{dest_conf_dir}/accumulo-env.sh"), mode=0644, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate(params.env_sh_template)) else: dest_conf_dir = params.server_conf_dir # create server conf directory Directory(params.server_conf_dir, mode=0700, owner=params.accumulo_user, group=params.user_group, recursive=True) # create a site file for server processes configs = {} configs.update(params.config['configurations']['accumulo-site']) configs["instance.secret"] = str( params.config['configurations']['accumulo-env']['instance_secret']) configs["trace.token.property.password"] = str(params.trace_password) XmlConfig( "accumulo-site.xml", conf_dir=dest_conf_dir, configurations=configs, configuration_attributes=params.config['configuration_attributes'] ['accumulo-site'], owner=params.accumulo_user, group=params.user_group, mode=0600) # create pid dir Directory(params.pid_dir, owner=params.accumulo_user, group=params.user_group, recursive=True) # create log dir Directory(params.log_dir, owner=params.accumulo_user, group=params.user_group, recursive=True) # create env file File(format("{dest_conf_dir}/accumulo-env.sh"), mode=0644, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate(params.server_env_sh_template)) # create client.conf file configs = {} configs["instance.name"] = params.instance_name configs["instance.zookeeper.host"] = params.config['configurations'][ 'accumulo-site']['instance.zookeeper.host'] if 'instance.rpc.sasl.enabled' in params.config['configurations'][ 'accumulo-site']: configs["instance.rpc.sasl.enabled"] = params.config['configurations'][ 'accumulo-site']['instance.rpc.sasl.enabled'] PropertiesFile(format("{dest_conf_dir}/client.conf"), properties=configs, owner=params.accumulo_user, group=params.user_group) # create log4j.properties files if (params.log4j_props != None): File(format("{params.conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.accumulo_user, content=params.log4j_props) else: File(format("{params.conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user) # create logging configuration files accumulo_TemplateConfig("auditLog.xml", dest_conf_dir) accumulo_TemplateConfig("generic_logger.xml", dest_conf_dir) accumulo_TemplateConfig("monitor_logger.xml", dest_conf_dir) accumulo_StaticFile("accumulo-metrics.xml", dest_conf_dir) # create host files accumulo_StaticFile("tracers", dest_conf_dir) accumulo_StaticFile("gc", dest_conf_dir) accumulo_StaticFile("monitor", dest_conf_dir) accumulo_StaticFile("slaves", dest_conf_dir) accumulo_StaticFile("masters", dest_conf_dir) # metrics configuration if params.has_metric_collector: accumulo_TemplateConfig("hadoop-metrics2-accumulo.properties", dest_conf_dir) # other server setup if name == 'master': params.HdfsDirectory(format("/user/{params.accumulo_user}"), action="create_delayed", owner=params.accumulo_user, mode=0700) params.HdfsDirectory(format("{params.parent_dir}"), action="create_delayed", owner=params.accumulo_user, mode=0700) params.HdfsDirectory(None, action="create") if params.security_enabled and params.has_secure_user_auth: Execute(format("{params.kinit_cmd} " "{params.daemon_script} init " "--user {params.accumulo_principal_name} " "--instance-name {params.instance_name} " "--clear-instance-name " ">{params.log_dir}/accumulo-init.out " "2>{params.log_dir}/accumulo-init.err"), not_if=as_user( format("{params.kinit_cmd} " "{params.hadoop_bin_dir}/hadoop --config " "{params.hadoop_conf_dir} fs -stat " "{params.instance_volumes}"), params.accumulo_user), user=params.accumulo_user) else: passfile = format("{params.exec_tmp_dir}/pass") try: File(passfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate('{{root_password}}\n' '{{root_password}}\n')) Execute(format("cat {passfile} | {params.daemon_script} init " "--instance-name {params.instance_name} " "--clear-instance-name " ">{params.log_dir}/accumulo-init.out " "2>{params.log_dir}/accumulo-init.err"), not_if=as_user( format("{params.kinit_cmd} " "{params.hadoop_bin_dir}/hadoop --config " "{params.hadoop_conf_dir} fs -stat " "{params.instance_volumes}"), params.accumulo_user), user=params.accumulo_user) finally: os.remove(passfile) if name == 'tracer': if params.security_enabled and params.has_secure_user_auth: Execute(format("{params.kinit_cmd} " "{params.daemon_script} init --reset-security " "--user {params.accumulo_principal_name} " "--password NA " ">{params.log_dir}/accumulo-reset.out " "2>{params.log_dir}/accumulo-reset.err"), not_if=as_user( format("{params.kinit_cmd} " "{params.daemon_script} shell -e " "\"userpermissions -u " "{params.accumulo_principal_name}\" | " "grep System.CREATE_TABLE"), params.accumulo_user), user=params.accumulo_user) create_user(params.smokeuser_principal, params.smoke_test_password) else: # do not try to reset security in nonsecure mode, for now # Execute( format("{params.daemon_script} init --reset-security " # "--user root " # ">{params.log_dir}/accumulo-reset.out " # "2>{params.log_dir}/accumulo-reset.err"), # not_if=as_user(format("cat {rpassfile} | " # "{params.daemon_script} shell -e " # "\"userpermissions -u root\" | " # "grep System.CREATE_TABLE"), # params.accumulo_user), # user=params.accumulo_user) create_user(params.smoke_test_user, params.smoke_test_password) create_user(params.trace_user, params.trace_password) rpassfile = format("{params.exec_tmp_dir}/pass0") cmdfile = format("{params.exec_tmp_dir}/resetcmds") try: File(cmdfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate( 'grant -t trace -u {{trace_user}} Table.ALTER_TABLE\n' 'grant -t trace -u {{trace_user}} Table.READ\n' 'grant -t trace -u {{trace_user}} Table.WRITE\n')) if params.security_enabled and params.has_secure_user_auth: Execute(format( "{params.kinit_cmd} {params.daemon_script} shell -f " "{cmdfile}"), only_if=as_user( format("{params.kinit_cmd} " "{params.daemon_script} shell " "-e \"table trace\""), params.accumulo_user), not_if=as_user( format("{params.kinit_cmd} " "{params.daemon_script} shell " "-e \"userpermissions -u " "{params.trace_user} | " "grep Table.READ | grep trace"), params.accumulo_user), user=params.accumulo_user) else: File(rpassfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate('{{root_password}}\n')) Execute( format("cat {rpassfile} | {params.daemon_script} shell -f " "{cmdfile} -u root"), only_if=as_user( format("cat {rpassfile} | " "{params.daemon_script} shell -u root " "-e \"table trace\""), params.accumulo_user), not_if=as_user( format("cat {rpassfile} | " "{params.daemon_script} shell -u root " "-e \"userpermissions -u " "{params.trace_user} | " "grep Table.READ | grep trace"), params.accumulo_user), user=params.accumulo_user) finally: try_remove(rpassfile) try_remove(cmdfile)
def falcon(type, action=None): import params if action == 'config': Directory(params.falcon_pid_dir, owner=params.falcon_user, recursive=True) Directory(params.falcon_log_dir, owner=params.falcon_user, recursive=True) Directory(params.falcon_webapp_dir, owner=params.falcon_user, recursive=True) Directory(params.falcon_home, owner=params.falcon_user, recursive=True) Directory(params.etc_prefix_dir, mode=0755, recursive=True) Directory(params.falcon_conf_dir, owner=params.falcon_user, recursive=True) File(params.falcon_conf_dir + '/falcon-env.sh', content=InlineTemplate(params.falcon_env_sh_template), owner=params.falcon_user) File(params.falcon_conf_dir + '/client.properties', content=Template('client.properties.j2'), mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/runtime.properties', properties=params.falcon_runtime_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/startup.properties', properties=params.falcon_startup_properties, mode=0644, owner=params.falcon_user) if params.falcon_graph_storage_directory: Directory(params.falcon_graph_storage_directory, owner=params.falcon_user, group=params.user_group, mode=0775, recursive=True, cd_access="a") if params.falcon_graph_serialize_path: Directory(params.falcon_graph_serialize_path, owner=params.falcon_user, group=params.user_group, mode=0775, recursive=True, cd_access="a") if type == 'server': if action == 'config': if params.store_uri[0:4] == "hdfs": params.HdfsDirectory(params.store_uri, action="create_delayed", owner=params.falcon_user, mode=0755) if params.store_uri[0:4] == "file": Directory(params.store_uri[7:], owner=params.falcon_user, recursive=True) params.HdfsDirectory( params.flacon_apps_dir, action="create_delayed", owner=params.falcon_user, mode=0777 #TODO change to proper mode ) if params.falcon_store_uri[0:4] == "hdfs": params.HdfsDirectory(params.falcon_store_uri, action="create_delayed", owner=params.falcon_user, mode=0755) if params.falcon_store_uri[0:4] == "file": Directory(params.falcon_store_uri[7:], owner=params.falcon_user, recursive=True) params.HdfsDirectory(None, action="create") Directory(params.falcon_local_dir, owner=params.falcon_user, recursive=True, cd_access="a") if params.falcon_embeddedmq_enabled == True: Directory(os.path.abspath( os.path.join(params.falcon_embeddedmq_data, "..")), owner=params.falcon_user, recursive=True) Directory(params.falcon_embeddedmq_data, owner=params.falcon_user, recursive=True) if action == 'start': Execute( format('{falcon_home}/bin/falcon-start -port {falcon_port}'), user=params.falcon_user, path=params.hadoop_bin_dir) if action == 'stop': Execute(format('{falcon_home}/bin/falcon-stop'), user=params.falcon_user, path=params.hadoop_bin_dir) File(params.server_pid_file, action='delete')
def hive(name=None): import params if name == 'hiveserver2': params.HdfsDirectory(params.hive_apps_whs_dir, action="create_delayed", owner=params.hive_user, mode=0777) params.HdfsDirectory(params.hive_hdfs_user_dir, action="create_delayed", owner=params.hive_user, mode=params.hive_hdfs_user_mode) params.HdfsDirectory(None, action="create") # We should change configurations for client as well as for server. # The reason is that stale-configs are service-level, not component. for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) if name == 'metastore' or name == 'hiveserver2': jdbc_connector() environment = {"no_proxy": format("{ambari_server_hostname}")} cmd = format("/bin/sh -c 'cd /usr/lib/ambari-agent/ && curl -kf -x \"\" " "--retry 5 " "{jdk_location}{check_db_connection_jar_name} " "-o {check_db_connection_jar_name}'") Execute(cmd, not_if=format("[ -f {check_db_connection_jar_name}]"), environment=environment) if name == 'metastore': File(params.start_metastore_path, mode=0755, content=StaticFile('startMetastore.sh')) if params.init_metastore_schema: create_schema_cmd = format( "export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_bin}/schematool -initSchema " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p}") check_schema_created_cmd = format( "export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_bin}/schematool -info " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p}") Execute(create_schema_cmd, not_if=check_schema_created_cmd) elif name == 'hiveserver2': File(params.start_hiveserver2_path, mode=0755, content=Template(format('{start_hiveserver2_script}'))) if name != "client": crt_directory(params.hive_pid_dir) crt_directory(params.hive_log_dir) crt_directory(params.hive_var_lib)
def webhcat(): import params if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, "2.2.0.0") < 0: params.HdfsDirectory(params.webhcat_apps_dir, action="create_delayed", owner=params.webhcat_user, mode=0755 ) if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsDirectory(params.hcat_hdfs_user_dir, action="create_delayed", owner=params.hcat_user, mode=params.hcat_hdfs_user_mode ) params.HdfsDirectory(params.webhcat_hdfs_user_dir, action="create_delayed", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode ) params.HdfsDirectory(None, action="create") Directory(params.templeton_pid_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, recursive=True) Directory(params.templeton_log_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, recursive=True) Directory(params.config_dir, recursive=True, owner=params.webhcat_user, group=params.user_group) if params.security_enabled: kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") else: kinit_if_needed = "" if kinit_if_needed: Execute(kinit_if_needed, user=params.webhcat_user, path='/bin' ) # TODO, these checks that are specific to HDP 2.2 and greater should really be in a script specific to that stack. if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, "2.2.0.0") >= 0: copy_tarballs_to_hdfs('hive', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) copy_tarballs_to_hdfs('pig', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) copy_tarballs_to_hdfs('hadoop-streaming', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) copy_tarballs_to_hdfs('sqoop', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) else: CopyFromLocal(params.hadoop_streeming_jars, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) if (os.path.isfile(params.pig_tar_file)): CopyFromLocal(params.pig_tar_file, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) CopyFromLocal(params.hive_tar_file, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) if (len(glob.glob(params.sqoop_tar_file)) > 0): CopyFromLocal(params.sqoop_tar_file, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) # Replace _HOST with hostname in relevant principal-related properties webhcat_site = params.config['configurations']['webhcat-site'].copy() for prop_name in ['templeton.hive.properties', 'templeton.kerberos.principal']: if prop_name in webhcat_site: webhcat_site[prop_name] = webhcat_site[prop_name].replace("_HOST", params.hostname) XmlConfig("webhcat-site.xml", conf_dir=params.config_dir, configurations=webhcat_site, configuration_attributes=params.config['configuration_attributes']['webhcat-site'], owner=params.webhcat_user, group=params.user_group, ) File(format("{config_dir}/webhcat-env.sh"), owner=params.webhcat_user, group=params.user_group, content=InlineTemplate(params.webhcat_env_sh_template) ) Directory(params.webhcat_conf_dir, cd_access='a', recursive=True ) log4j_webhcat_filename = 'webhcat-log4j.properties' if (params.log4j_webhcat_props != None): File(format("{config_dir}/{log4j_webhcat_filename}"), mode=0644, group=params.user_group, owner=params.webhcat_user, content=params.log4j_webhcat_props ) elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")): File(format("{config_dir}/{log4j_webhcat_filename}"), mode=0644, group=params.user_group, owner=params.webhcat_user, content=StaticFile(format("{config_dir}/{log4j_webhcat_filename}.template")) )
def webhcat(): import params params.HdfsDirectory(params.webhcat_apps_dir, action="create_delayed", owner=params.webhcat_user, mode=0755) if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsDirectory(params.hcat_hdfs_user_dir, action="create_delayed", owner=params.hcat_user, mode=params.hcat_hdfs_user_mode) params.HdfsDirectory(params.webhcat_hdfs_user_dir, action="create_delayed", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode) params.HdfsDirectory(None, action="create") Directory(params.templeton_pid_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, recursive=True) Directory(params.templeton_log_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, recursive=True) Directory(params.config_dir, owner=params.webhcat_user, group=params.user_group) XmlConfig( "webhcat-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['webhcat-site'], configuration_attributes=params.config['configuration_attributes'] ['webhcat-site'], owner=params.webhcat_user, group=params.user_group, ) File(format("{config_dir}/webhcat-env.sh"), owner=params.webhcat_user, group=params.user_group, content=InlineTemplate(params.webhcat_env_sh_template)) if params.security_enabled: kinit_if_needed = format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") else: kinit_if_needed = "" if kinit_if_needed: Execute(kinit_if_needed, user=params.webhcat_user, path='/bin') CopyFromLocal('/usr/lib/hadoop-mapreduce/hadoop-streaming-*.jar', owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user)