def start_datanode(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_datanode(namenode.namenodes()[0], namenode.port()) utils.install_ssh_key('hdfs', namenode.ssh_key()) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() hdfs.start_datanode() hadoop.open_ports('datanode') set_state('datanode.started')
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager( resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager(resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def install(self): version = hookenv.config()['spark_version'] spark_path = self.extract_spark_binary('spark-{}'.format(version), version) os.symlink(spark_path, self.dist_config.path('spark')) unitdata.kv().set('spark.version', version) self.dist_config.add_users() self.dist_config.add_dirs() self.dist_config.add_packages() # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def install(self, force=False): if not force and self.is_installed(): return jujuresources.install(self.resources['spark'], destination=self.dist_config.path('spark'), skip_top_level=True) self.dist_config.add_users() self.dist_config.add_dirs() self.dist_config.add_packages() # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def update_config(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() namenode_data = ( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port(), ) if data_changed('datanode.namenode-data', namenode_data): hdfs.configure_datanode(*namenode_data) if is_state('datanode.started'): # re-check because for manual call hdfs.restart_datanode() hdfs.restart_journalnode() if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()): utils.install_ssh_key('hdfs', namenode.ssh_key())
def install(self): version = hookenv.config()['spark_version'] spark_path = self.extract_spark_binary('spark-{}'.format(version), version) os.symlink(spark_path, self.dist_config.path('spark')) unitdata.kv().set('spark.version', version) self.dist_config.add_users() self.dist_config.add_dirs() self.dist_config.add_packages() # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) utils.initialize_kv_host() utils.manage_etc_hosts() hostname = hookenv.local_unit().replace('/', '-') etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname]) unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def install(self): ''' install Spark and add dependencies in dist-config ''' self.dist_config.add_dirs() self.dist_config.add_packages() jujuresources.install(self.resources['spark'], destination=self.dist_config.path('spark'), skip_top_level=True) # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) # put the spark jar in hdfs spark_assembly_jar = glob( '{}/lib/spark-assembly-*.jar'.format(self.dist_config.path('spark')) )[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: hookenv.log("File exists") # create hdfs storage space for history server utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory') # create hdfs storage space for spark-bench utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/spark-bench') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/spark-bench')
def install(self): ''' install Spark and add dependencies in dist-config ''' self.dist_config.add_dirs() self.dist_config.add_packages() jujuresources.install(self.resources['spark'], destination=self.dist_config.path('spark'), skip_top_level=True) # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) # put the spark jar in hdfs spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format( self.dist_config.path('spark')))[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: hookenv.log("File exists") # create hdfs storage space for history server utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory') # create hdfs storage space for spark-bench utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/spark-bench') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/spark-bench')
def install_ssh_keys(self): unit, data = any_ready_unit(self.relation_name) ssh_key = data['ssh-key'] utils.install_ssh_key(self.ssh_user, ssh_key)