class PHDRpmUtil(HadoopUtil): """Utility for installing PHD Single node clusters using RPMs""" def __init__( self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, template_conf_dir, hostname = 'localhost', secure_hadoop = False ): HadoopUtil.__init__(self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, hostname) self.rpmutil = RPMUtil() self.hostname = hostname self.hadoop_artifact_url = hadoop_artifact_url self.hadoop_install_dir = hadoop_install_dir self.hadoop_binary_loc = '' self.hadoop_data_dir = hadoop_data_dir self.template_conf_dir = template_conf_dir self.secure_hadoop = secure_hadoop # Constants # under the hadoop template configuration directory # both the below directories should be present self.SECURE_DIR_NAME = "conf.secure" # secure configuration files location self.NON_SECURE_DIR_NAME = "conf.pseudo" # non-secure configuration files location self.DEPENDENCY_PKGS = [ "fuse-", # eg. fuse-2.8.3-4.el6.x86_64 "fuse-libs", # eg. fuse-libs-2.8.3-4.el6.x86_6 "nc-" # eg. 1.84-22.el6.x86_64" ] self.HADOOP_UTILITY_RPMS = "utility/rpm/" self.ZOOKEEPER_RPMS = "zookeeper/rpm/" self.HADOOP_RPMS = "hadoop/rpm/" self.HADOOP_ENVS = { "HADOOP_HOME" : "/usr/lib/gphd/hadoop/", "HADOOP_COMMON_HOME" : "/usr/lib/gphd/hadoop/", "HADOOP_HDFS_HOME" : "/usr/lib/gphd/hadoop-hdfs/", "HADOOP_MAPRED_HOME" : "/usr/lib/gphd/hadoop-mapreduce/", "YARN_HOME" : "/usr/lib/gphd/hadoop-yarn/", "HADOOP_TMP_DIR" : "%s/hadoop-hdfs/cache/" %self.hadoop_data_dir, "MAPRED_TMP_DIR" : "%s/hadoop-mapreduce/cache/" %self.hadoop_data_dir, "YARN_TMP_DIR" : "%s/hadoop-yarn/cache/" %self.hadoop_data_dir, "HADOOP_CONF_DIR" : "/etc/hadoop/conf", "HADOOP_LOG_DIR" : "%s/hadoop-logs/hadoop-hdfs" %self.hadoop_data_dir, "MAPRED_LOG_DIR" : "%s/hadoop-logs/hadoop-mapreduce" %self.hadoop_data_dir, "YARN_LOG_DIR" : "%s/hadoop-logs/hadoop-yarn" %self.hadoop_data_dir } self.PKGS_TO_REMOVE = "^hadoop-*|^bigtop-*|^zookeeper-*|^parquet-*" def _get_hadoop_conf_dir(self): """ Gets the hadoop configuration directory location """ cmd_str = "find /etc/gphd/ -name conf | egrep -v \"zookeeper|httpfs\"" res = {} if run_shell_command(cmd_str, "Find HADOOP_CONF_DIR", res): return res['stdout'].split('\n')[0] def _remove_installed_pkgs(self): self.rpmutil.erase_all_packages(self.PKGS_TO_REMOVE) def _install_dependency_pkgs(self): for pkg in self.DEPENDENCY_PKGS: if not self.rpmutil.is_pkg_installed("^" + pkg): self.rpmutil.install_package_using_yum(pkg, is_regex_pkg_name = True) def cleanup(self): """ Clean-up process to: 1. kill all the hadoop daemon process from previous runs if any 2. Remove the contents from the hadoop installation & configuration locations """ self.stop_hadoop() cmd_str = "ps aux | awk '/\-Dhadoop/{print $2}' | xargs sudo kill -9" run_shell_command(cmd_str, "Kill zombie hadoop daemons") cmd_str = "sudo rm -rf " for key,value in self.HADOOP_ENVS.iteritems(): cmd_str = cmd_str + value +"* " cmd_str = cmd_str + "/etc/gphd" run_shell_command(cmd_str,"Clean up HDFS files") self._remove_installed_pkgs() def _create_symlinks(self, lib_dir, symlink): res = {} cmd_str = "sudo find %s -name \"%s*\"" % (lib_dir,symlink) run_shell_command(cmd_str, "Check for %s symlink" %symlink, res) result = res['stdout'] if result: result = result.splitlines() if len(result) == 1: cmd_str = "cd %s; sudo ln -s %s %s" %(lib_dir, result[0], symlink) run_shell_command(cmd_str, "Create %s symlink" %symlink) def install_binary(self): """ Installs RPM binaries of: 1. utility eg. bigtop utils 2. zookeeper 3. hadoop """ self._install_dependency_pkgs() # install utility rpms hadoop_utility_rpms_loc = os.path.join(self.hadoop_binary_loc, self.HADOOP_UTILITY_RPMS) self.rpmutil.install_rpms_from(hadoop_utility_rpms_loc) # install zookeeper rpms zookeeper_rpms_loc = os.path.join(self.hadoop_binary_loc, self.ZOOKEEPER_RPMS) self.rpmutil.install_rpms_from(zookeeper_rpms_loc) # install hadoop rpms hadoop_rpms_loc = os.path.join(self.hadoop_binary_loc, self.HADOOP_RPMS) self.rpmutil.install_rpms_from(hadoop_rpms_loc) # create hadoop sym links inside /var/lib/gphd lib_dir = "/var/lib/gphd" self._create_symlinks(lib_dir, "hadoop-hdfs") self._create_symlinks(lib_dir, "hadoop-yarn") self._create_symlinks(lib_dir, "hadoop-mapreduce") self._create_symlinks(lib_dir, "zookeeper") def install_hadoop_configurations(self): """ Based on type of installation secure or non-secure, installs the updated template configuration files and makes required changes to the env files. """ ##TODO: Create separate directories for secure & non-secure ## in the hadoop conf dir and copy the update configs in respective directories self.HADOOP_ENVS['HADOOP_CONF_DIR'] = self._get_hadoop_conf_dir() # check the type of hadoop installation - secure or non secure if self.secure_hadoop: # SECURE_DIR_NAME is expected to be present under template configuration directory secure_conf = os.path.join(self.template_conf_dir, self.SECURE_DIR_NAME) super(PHDRpmUtil,self).install_hadoop_configurations(secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) # update env files in /etc/default/hadoop* if self.hadoop_data_dir.endswith('/'): self.hadoop_data_dir = self.hadoop_data_dir[:-1] cmd_str = "for env_file in `ls /etc/default/hadoop*`;" \ "do " \ "sudo sed -r -i 's:\/var\/log(\/gphd)?:\%s\/hadoop-logs:g' ${env_file};" \ "done" %self.hadoop_data_dir run_shell_command(cmd_str, "Update env files in /etc/default/hadoop*") # update hadoop-env.sh file hadoop_env_file = os.path.join( self.HADOOP_ENVS['HADOOP_CONF_DIR'], "hadoop-env.sh" ) if not os.path.exists(hadoop_env_file): tinctest.logger.info("hadoop-env.sh not found..creating a new one!") run_shell_command("sudo touch %s" %hadoop_env_file, "Create hadoop-env.sh file") # give write permissions on the file self.give_others_write_perm(hadoop_env_file) text = "\n### Added env variables\n" \ "export JAVA_HOME=%s\n" \ "export HADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true " \ "-Djava.library.path=$HADOOP_HOME/lib/native/\"\n" %self.get_java_home() self.append_text_to_file(hadoop_env_file,text) # revert back to old permissions self.remove_others_write_perm(hadoop_env_file) # update env files hadoop-hdfs-datanode & hadoop hdfs_datanode_env = "/etc/default/hadoop-hdfs-datanode" hdfs_hadoop_env = "/etc/default/hadoop" self.give_others_write_perm(hdfs_datanode_env) text = "\n### Secure env variables\n" \ "export HADOOP_SECURE_DN_USER=hdfs\n" \ "export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/hdfs\n" \ "export HADOOP_PID_DIR=/var/run/gphd/hadoop-hdfs/\n" \ "export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}\n" self.append_text_to_file(hdfs_datanode_env, text) self.remove_others_write_perm(hdfs_datanode_env) self.give_others_write_perm(hdfs_hadoop_env) self.append_text_to_file(hdfs_hadoop_env, "export JSVC_HOME=/usr/libexec/bigtop-utils\n") self.remove_others_write_perm(hdfs_hadoop_env) # change the permissions of container-executor container_bin_path = os.path.join(self.HADOOP_ENVS['YARN_HOME'],'bin/container-executor') cmd_str = "sudo chown root:yarn %s" %container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod 050 %s" %container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod u+s %s" %container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod g+s %s" %container_bin_path run_shell_command(cmd_str) else: # NON_SECURE_DIR_NAME is expected to be present under template configuration directory non_secure_conf = os.path.join(self.template_conf_dir, self.NON_SECURE_DIR_NAME) super(PHDRpmUtil, self).install_hadoop_configurations(non_secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) def start_hdfs(self): # format namenode cmd_str = "sudo -u hdfs hdfs --config %s namenode -format" %self.HADOOP_ENVS['HADOOP_CONF_DIR'] namenode_formatted = run_shell_command(cmd_str) if not namenode_formatted: raise Exception("Exception in namnode formatting") # start namenode cmd_str = "sudo /etc/init.d/hadoop-hdfs-namenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-datanode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-secondarynamenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Secondary namenode not started") def set_hdfs_permissions(self): if self.secure_hadoop: hdfs_cmd = "sudo hdfs dfs" else: hdfs_cmd = "sudo -u hdfs hdfs dfs" # set hdfs permissions cmd_str = "%s -chmod -R 777 /" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir /tmp" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 1777 /tmp" %hdfs_cmd run_shell_command(cmd_str) # cmd_str = "%s -mkdir -p /var/log/gphd/hadoop-yarn" %hdfs_cmd # run_shell_command(cmd_str) cmd_str = "%s -mkdir /user" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 777 /user" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir /user/history" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chown mapred:hadoop /user/history" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 1777 -R /user/history" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -ls -R /" %hdfs_cmd run_shell_command(cmd_str) def put_file_in_hdfs(self, input_path, hdfs_path): if hdfs_path.rfind('/') > 0: hdfs_dir = hdfs_path[:hdfs_path.rfind('/')] cmd_str = "hdfs dfs -mkdir -p %s" %hdfs_dir run_shell_command(cmd_str, "Creating parent HDFS dir for path %s" %input_path) cmd_str = "hdfs dfs -put %s %s" %(input_path, hdfs_path) run_shell_command(cmd_str, "Copy to HDFS : file %s" %input_path) def remove_file_from_hdfs(self, hdfs_path): cmd_str = "hdfs dfs -rm -r %s" %hdfs_path run_shell_command(cmd_str, "Remove %s from HDFS" %hdfs_path) def start_yarn(self): # start yarn daemons # start resource manager self.set_hdfs_permissions() cmd_str = "sudo /etc/init.d/hadoop-yarn-resourcemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Resource manager not started") # start node manager cmd_str = "sudo /etc/init.d/hadoop-yarn-nodemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Node manager not started") # start history server cmd_str = "sudo /etc/init.d/hadoop-mapreduce-historyserver start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("History server not started") def start_hadoop(self): """ Starts the PHD cluster and checks the JPS status """ self.start_hdfs() self.start_yarn() res = {} # run jps command & check for hadoop daemons cmd_str = "sudo jps" run_shell_command(cmd_str, "Check Hadoop Daemons", res) result = res['stdout'] tinctest.logger.info("\n**** Following Hadoop Daemons started **** \n%s" %result) tinctest.logger.info("*** Hadoop Started Successfully!!") def stop_hadoop(self): """ Stops the PHD cluster """ run_shell_command("sudo /etc/init.d/hadoop-mapreduce-historyserver stop", "Stop history-server") run_shell_command("sudo /etc/init.d/hadoop-yarn-nodemanager stop", "Stop Node manager") run_shell_command("sudo /etc/init.d/hadoop-yarn-resourcemanager stop", "Stop resourcemanager") run_shell_command("sudo /etc/init.d/hadoop-hdfs-secondarynamenode stop", "Stop secondarynamenode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-datanode stop", "Stop datanode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-namenode stop", "Stop namenode") def get_hadoop_env(self): """ Returns a dictionary of hadoop environment variables like: 1. HADOOP_HOME 2. HADOOP_CONF_DIR 3. HADOOP_COMMON_HOME 4. HADOOP_HDFS_HOME 5. YARN_HOME 6. HADOOP_MAPRED_HOME """ return self.HADOOP_ENVS def init_cluster(self): """ Init point for starting up the PHD cluster """ self.download_binary_and_untar() self.cleanup() self.install_binary() self.install_hadoop_configurations() self.start_hadoop()
class PHDRpmUtil(HadoopUtil): """Utility for installing PHD Single node clusters using RPMs""" def __init__(self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, template_conf_dir, hostname='localhost', secure_hadoop=False): HadoopUtil.__init__(self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, hostname) self.rpmutil = RPMUtil() self.hostname = hostname self.hadoop_artifact_url = hadoop_artifact_url self.hadoop_install_dir = hadoop_install_dir self.hadoop_binary_loc = '' self.hadoop_data_dir = hadoop_data_dir self.template_conf_dir = template_conf_dir self.secure_hadoop = secure_hadoop # Constants # under the hadoop template configuration directory # both the below directories should be present self.SECURE_DIR_NAME = "conf.secure" # secure configuration files location self.NON_SECURE_DIR_NAME = "conf.pseudo" # non-secure configuration files location self.DEPENDENCY_PKGS = [ "fuse-", # eg. fuse-2.8.3-4.el6.x86_64 "fuse-libs", # eg. fuse-libs-2.8.3-4.el6.x86_6 "nc-" # eg. 1.84-22.el6.x86_64" ] self.HADOOP_UTILITY_RPMS = "utility/rpm/" self.ZOOKEEPER_RPMS = "zookeeper/rpm/" self.HADOOP_RPMS = "hadoop/rpm/" self.HADOOP_ENVS = { "HADOOP_HOME": "/usr/lib/gphd/hadoop/", "HADOOP_COMMON_HOME": "/usr/lib/gphd/hadoop/", "HADOOP_HDFS_HOME": "/usr/lib/gphd/hadoop-hdfs/", "HADOOP_MAPRED_HOME": "/usr/lib/gphd/hadoop-mapreduce/", "YARN_HOME": "/usr/lib/gphd/hadoop-yarn/", "HADOOP_TMP_DIR": "%s/hadoop-hdfs/cache/" % self.hadoop_data_dir, "MAPRED_TMP_DIR": "%s/hadoop-mapreduce/cache/" % self.hadoop_data_dir, "YARN_TMP_DIR": "%s/hadoop-yarn/cache/" % self.hadoop_data_dir, "HADOOP_CONF_DIR": "/etc/hadoop/conf", "HADOOP_LOG_DIR": "%s/hadoop-logs/hadoop-hdfs" % self.hadoop_data_dir, "MAPRED_LOG_DIR": "%s/hadoop-logs/hadoop-mapreduce" % self.hadoop_data_dir, "YARN_LOG_DIR": "%s/hadoop-logs/hadoop-yarn" % self.hadoop_data_dir } self.PKGS_TO_REMOVE = "^hadoop-*|^bigtop-*|^zookeeper-*|^parquet-*" def _get_hadoop_conf_dir(self): """ Gets the hadoop configuration directory location """ cmd_str = "find /etc/gphd/ -name conf | egrep -v \"zookeeper|httpfs\"" res = {} if run_shell_command(cmd_str, "Find HADOOP_CONF_DIR", res): return res['stdout'].split('\n')[0] def _remove_installed_pkgs(self): self.rpmutil.erase_all_packages(self.PKGS_TO_REMOVE) def _install_dependency_pkgs(self): for pkg in self.DEPENDENCY_PKGS: if not self.rpmutil.is_pkg_installed("^" + pkg): self.rpmutil.install_package_using_yum(pkg, is_regex_pkg_name=True) def cleanup(self): """ Clean-up process to: 1. kill all the hadoop daemon process from previous runs if any 2. Remove the contents from the hadoop installation & configuration locations """ self.stop_hadoop() cmd_str = "ps aux | awk '/\-Dhadoop/{print $2}' | xargs sudo kill -9" run_shell_command(cmd_str, "Kill zombie hadoop daemons") cmd_str = "sudo rm -rf " for key, value in self.HADOOP_ENVS.iteritems(): cmd_str = cmd_str + value + "* " cmd_str = cmd_str + "/etc/gphd" run_shell_command(cmd_str, "Clean up HDFS files") self._remove_installed_pkgs() def _create_symlinks(self, lib_dir, symlink): res = {} cmd_str = "sudo find %s -name \"%s*\"" % (lib_dir, symlink) run_shell_command(cmd_str, "Check for %s symlink" % symlink, res) result = res['stdout'] if result: result = result.splitlines() if len(result) == 1: cmd_str = "cd %s; sudo ln -s %s %s" % (lib_dir, result[0], symlink) run_shell_command(cmd_str, "Create %s symlink" % symlink) def install_binary(self): """ Installs RPM binaries of: 1. utility eg. bigtop utils 2. zookeeper 3. hadoop """ self._install_dependency_pkgs() # install utility rpms hadoop_utility_rpms_loc = os.path.join(self.hadoop_binary_loc, self.HADOOP_UTILITY_RPMS) self.rpmutil.install_rpms_from(hadoop_utility_rpms_loc) # install zookeeper rpms zookeeper_rpms_loc = os.path.join(self.hadoop_binary_loc, self.ZOOKEEPER_RPMS) self.rpmutil.install_rpms_from(zookeeper_rpms_loc) # install hadoop rpms hadoop_rpms_loc = os.path.join(self.hadoop_binary_loc, self.HADOOP_RPMS) self.rpmutil.install_rpms_from(hadoop_rpms_loc) # create hadoop sym links inside /var/lib/gphd lib_dir = "/var/lib/gphd" self._create_symlinks(lib_dir, "hadoop-hdfs") self._create_symlinks(lib_dir, "hadoop-yarn") self._create_symlinks(lib_dir, "hadoop-mapreduce") self._create_symlinks(lib_dir, "zookeeper") def install_hadoop_configurations(self): """ Based on type of installation secure or non-secure, installs the updated template configuration files and makes required changes to the env files. """ ##TODO: Create separate directories for secure & non-secure ## in the hadoop conf dir and copy the update configs in respective directories self.HADOOP_ENVS['HADOOP_CONF_DIR'] = self._get_hadoop_conf_dir() # check the type of hadoop installation - secure or non secure if self.secure_hadoop: # SECURE_DIR_NAME is expected to be present under template configuration directory secure_conf = os.path.join(self.template_conf_dir, self.SECURE_DIR_NAME) super(PHDRpmUtil, self).install_hadoop_configurations( secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) # update env files in /etc/default/hadoop* if self.hadoop_data_dir.endswith('/'): self.hadoop_data_dir = self.hadoop_data_dir[:-1] cmd_str = "for env_file in `ls /etc/default/hadoop*`;" \ "do " \ "sudo sed -r -i 's:\/var\/log(\/gphd)?:\%s\/hadoop-logs:g' ${env_file};" \ "done" %self.hadoop_data_dir run_shell_command(cmd_str, "Update env files in /etc/default/hadoop*") # update hadoop-env.sh file hadoop_env_file = os.path.join(self.HADOOP_ENVS['HADOOP_CONF_DIR'], "hadoop-env.sh") if not os.path.exists(hadoop_env_file): tinctest.logger.info( "hadoop-env.sh not found..creating a new one!") run_shell_command("sudo touch %s" % hadoop_env_file, "Create hadoop-env.sh file") # give write permissions on the file self.give_others_write_perm(hadoop_env_file) text = "\n### Added env variables\n" \ "export JAVA_HOME=%s\n" \ "export HADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true " \ "-Djava.library.path=$HADOOP_HOME/lib/native/\"\n" %self.get_java_home() self.append_text_to_file(hadoop_env_file, text) # revert back to old permissions self.remove_others_write_perm(hadoop_env_file) # update env files hadoop-hdfs-datanode & hadoop hdfs_datanode_env = "/etc/default/hadoop-hdfs-datanode" hdfs_hadoop_env = "/etc/default/hadoop" self.give_others_write_perm(hdfs_datanode_env) text = "\n### Secure env variables\n" \ "export HADOOP_SECURE_DN_USER=hdfs\n" \ "export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/hdfs\n" \ "export HADOOP_PID_DIR=/var/run/gphd/hadoop-hdfs/\n" \ "export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}\n" self.append_text_to_file(hdfs_datanode_env, text) self.remove_others_write_perm(hdfs_datanode_env) self.give_others_write_perm(hdfs_hadoop_env) self.append_text_to_file( hdfs_hadoop_env, "export JSVC_HOME=/usr/libexec/bigtop-utils\n") self.remove_others_write_perm(hdfs_hadoop_env) # change the permissions of container-executor container_bin_path = os.path.join(self.HADOOP_ENVS['YARN_HOME'], 'bin/container-executor') cmd_str = "sudo chown root:yarn %s" % container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod 050 %s" % container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod u+s %s" % container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod g+s %s" % container_bin_path run_shell_command(cmd_str) else: # NON_SECURE_DIR_NAME is expected to be present under template configuration directory non_secure_conf = os.path.join(self.template_conf_dir, self.NON_SECURE_DIR_NAME) super(PHDRpmUtil, self).install_hadoop_configurations( non_secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) def start_hdfs(self): # format namenode cmd_str = "sudo -u hdfs hdfs --config %s namenode -format" % self.HADOOP_ENVS[ 'HADOOP_CONF_DIR'] namenode_formatted = run_shell_command(cmd_str) if not namenode_formatted: raise Exception("Exception in namnode formatting") # start namenode cmd_str = "sudo /etc/init.d/hadoop-hdfs-namenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-datanode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-secondarynamenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Secondary namenode not started") def set_hdfs_permissions(self): if self.secure_hadoop: hdfs_cmd = "sudo hdfs dfs" else: hdfs_cmd = "sudo -u hdfs hdfs dfs" # set hdfs permissions cmd_str = "%s -chmod -R 777 /" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir /tmp" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 1777 /tmp" % hdfs_cmd run_shell_command(cmd_str) # cmd_str = "%s -mkdir -p /var/log/gphd/hadoop-yarn" %hdfs_cmd # run_shell_command(cmd_str) cmd_str = "%s -mkdir /user" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 777 /user" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir /user/history" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chown mapred:hadoop /user/history" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 1777 -R /user/history" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -ls -R /" % hdfs_cmd run_shell_command(cmd_str) def put_file_in_hdfs(self, input_path, hdfs_path): if hdfs_path.rfind('/') > 0: hdfs_dir = hdfs_path[:hdfs_path.rfind('/')] cmd_str = "hdfs dfs -mkdir -p %s" % hdfs_dir run_shell_command( cmd_str, "Creating parent HDFS dir for path %s" % input_path) cmd_str = "hdfs dfs -put %s %s" % (input_path, hdfs_path) run_shell_command(cmd_str, "Copy to HDFS : file %s" % input_path) def remove_file_from_hdfs(self, hdfs_path): cmd_str = "hdfs dfs -rm -r %s" % hdfs_path run_shell_command(cmd_str, "Remove %s from HDFS" % hdfs_path) def start_yarn(self): # start yarn daemons # start resource manager self.set_hdfs_permissions() cmd_str = "sudo /etc/init.d/hadoop-yarn-resourcemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Resource manager not started") # start node manager cmd_str = "sudo /etc/init.d/hadoop-yarn-nodemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Node manager not started") # start history server cmd_str = "sudo /etc/init.d/hadoop-mapreduce-historyserver start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("History server not started") def start_hadoop(self): """ Starts the PHD cluster and checks the JPS status """ self.start_hdfs() self.start_yarn() res = {} # run jps command & check for hadoop daemons cmd_str = "sudo jps" run_shell_command(cmd_str, "Check Hadoop Daemons", res) result = res['stdout'] tinctest.logger.info( "\n**** Following Hadoop Daemons started **** \n%s" % result) tinctest.logger.info("*** Hadoop Started Successfully!!") def stop_hadoop(self): """ Stops the PHD cluster """ run_shell_command( "sudo /etc/init.d/hadoop-mapreduce-historyserver stop", "Stop history-server") run_shell_command("sudo /etc/init.d/hadoop-yarn-nodemanager stop", "Stop Node manager") run_shell_command("sudo /etc/init.d/hadoop-yarn-resourcemanager stop", "Stop resourcemanager") run_shell_command( "sudo /etc/init.d/hadoop-hdfs-secondarynamenode stop", "Stop secondarynamenode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-datanode stop", "Stop datanode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-namenode stop", "Stop namenode") def get_hadoop_env(self): """ Returns a dictionary of hadoop environment variables like: 1. HADOOP_HOME 2. HADOOP_CONF_DIR 3. HADOOP_COMMON_HOME 4. HADOOP_HDFS_HOME 5. YARN_HOME 6. HADOOP_MAPRED_HOME """ return self.HADOOP_ENVS def init_cluster(self): """ Init point for starting up the PHD cluster """ self.download_binary_and_untar() self.cleanup() self.install_binary() self.install_hadoop_configurations() self.start_hadoop()