def configure_hdfs(hdfs_rel): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.start_resourcemanager() yarn.start_jobhistory() hadoop.open_ports('resourcemanager') set_state('resourcemanager.started')
def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump({ 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port()) set_state('hadoop.hdfs.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts( {node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in datanode.nodes()] if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hookenv.status_set( 'active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def configure_yarn(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() if not resourcemanager.resourcemanagers(): data = yaml.dump( { 'relation_name': resourcemanager.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in resourcemanager.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids( resourcemanager.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) yarn.configure_yarn_base(resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) set_state('hadoop.yarn.configured')
def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump( { 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port()) set_state('hadoop.hdfs.configured')
def send_info(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') utils.update_kv_hosts(nodemanager.hosts_map()) utils.manage_etc_hosts() nodemanager.send_spec(hadoop.spec()) nodemanager.send_resourcemanagers([local_hostname]) nodemanager.send_ports(port, hs_http, hs_ipc) nodemanager.send_ssh_key(utils.get_ssh_key('yarn')) nodemanager.send_hosts_map(utils.get_kv_hosts()) slaves = nodemanager.nodes() if data_changed('resourcemanager.slaves', slaves): unitdata.kv().set('resourcemanager.slaves', slaves) yarn.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('resourcemanager.ready')
def configure_yarn(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() if not resourcemanager.resourcemanagers(): data = yaml.dump({ 'relation_name': resourcemanager.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in resourcemanager.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids( resourcemanager.relation_name ) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) yarn.configure_yarn_base( resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) set_state('hadoop.yarn.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts({node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in datanode.nodes()] if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def start_datanode(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) update_config(namenode) # force config update hdfs.start_datanode() hdfs.start_journalnode() hadoop.open_ports('datanode') set_state('datanode.started')
def update_zk_config(zookeeper): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) zk_nodes = sorted(zookeeper.zookeepers(), key=itemgetter('host')) zk_started = is_state('namenode.zk.started') hdfs.configure_zookeeper(zk_nodes) if zk_started and data_changed('namenode.zk', zk_nodes): hdfs.restart_zookeeper()
def hdfs_departed(): hadoop = get_hadoop_base() yarn = YARN(hadoop) hadoop.close_ports('resourcemanager') yarn.stop_jobhistory() yarn.stop_resourcemanager() remove_state('resourcemanager.started') remove_state('resourcemanager.ready')
def send_info(datanode): hadoop = get_hadoop_base() hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') datanode.send_spec(hadoop.spec()) datanode.send_clustername(hookenv.service_name()) datanode.send_namenodes(get_cluster_nodes()) datanode.send_ports(hdfs_port, webhdfs_port)
def configure_resourcemanager(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_resourcemanager() yarn.configure_jobhistory() utils.update_kv_hosts({ip_addr: local_hostname}) set_state('resourcemanager.configured')
def update_slaves(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) slaves = datanode.nodes() if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hdfs.reload_slaves() set_state('namenode.ready')
def accept_clients(clients): hadoop = get_hadoop_base() local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') clients.send_spec(hadoop.spec()) clients.send_namenodes([local_hostname]) clients.send_ports(hdfs_port, webhdfs_port) clients.send_hosts_map(utils.get_kv_hosts()) clients.send_ready(True)
def configure_namenode(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode(get_cluster_nodes()) hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.initialize_kv_host() utils.manage_etc_hosts() set_state('namenode.started')
def accept_clients(clients): hadoop = get_hadoop_base() hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') clients.send_spec(hadoop.spec()) clients.send_clustername(hookenv.service_name()) clients.send_namenodes(get_cluster_nodes()) clients.send_ports(hdfs_port, webhdfs_port) clients.send_hosts_map(utils.get_kv_hosts()) clients.send_ready(True)
def start_zookeeper(zookeeper): local_hostname = hookenv.local_unit().replace('/', '-') if local_hostname not in get_cluster_nodes(): # can't run zkfc on a non-cluster node return update_zk_config(zookeeper) # ensure config is up to date hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.restart_namenode() hdfs.start_zookeeper() set_state('namenode.zk.started')
def accept_clients(clients): hadoop = get_hadoop_base() local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') clients.send_spec(hadoop.spec()) clients.send_resourcemanagers([local_hostname]) clients.send_ports(port, hs_http, hs_ipc) clients.send_hosts_map(utils.get_kv_hosts()) clients.send_ready(True)
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager( resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def configure_namenode(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.update_kv_hosts({ip_addr: local_hostname}) set_state('namenode.started')
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager(resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def ganglia_changed(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) yarn = YARN(hadoop) if is_state('namenode.started'): hdfs.restart_namenode() if is_state('datanode.started'): hdfs.restart_datanode() if is_state('journalnode.started'): hdfs.restart_journalnode() if is_state('resourcemanager.started'): yarn.restart_resourcemanager() if is_state('nodemanager.started'): yarn.restart_nodemanager()
def init_ha_active(datanode, cluster): """ Do initial HA setup on the leader. """ local_hostname = hookenv.local_unit().replace('/', '-') hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.stop_namenode() remove_state('namenode.started') # initial cluster is us (active) plus a standby set_cluster_nodes([local_hostname, cluster.nodes()[0]]) update_ha_config(datanode) hdfs.init_sharededits() hdfs.start_namenode() leadership.leader_set({'ha-initialized': 'true'}) set_state('namenode.started')
def update_ha_config(datanode): cluster_nodes = get_cluster_nodes() jn_nodes = sorted(datanode.nodes()) jn_port = datanode.jn_port() started = is_state('namenode.started') new_cluster_config = data_changed('namenode.cluster-nodes', cluster_nodes) new_jn_config = data_changed('namenode.jn.config', (jn_nodes, jn_port)) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode(cluster_nodes) hdfs.register_journalnodes(jn_nodes, jn_port) if started and new_cluster_config: hdfs.restart_namenode() elif started and new_jn_config: hdfs.reload_slaves() # is this actually necessary?
def init_ha_standby(datanode, cluster): """ Once initial HA setup is done, any new NameNode is started as standby. """ local_hostname = hookenv.local_unit().replace('/', '-') if local_hostname not in get_cluster_nodes(): # can't even bootstrapStandby if not in the list of chosen nodes return update_ha_config(datanode) # ensure the config is written hadoop = get_hadoop_base() hdfs = HDFS(hadoop) update_ha_config(datanode) hdfs.bootstrap_standby() hdfs.start_namenode() cluster.standby_ready() set_state('namenode.standby') hadoop.open_ports('namenode') set_state('namenode.started')
def unregister_nodemanager(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) slaves = unitdata.kv().get('resourcemanager.slaves', []) slaves_leaving = nodemanager.nodes() hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('resourcemanager.slaves', slaves_remaining) yarn.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: remove_state('resourcemanager.ready') nodemanager.dismiss()
def update_config(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() namenode_data = ( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port(), ) if data_changed('datanode.namenode-data', namenode_data): hdfs.configure_datanode(*namenode_data) if is_state('datanode.started'): # re-check because for manual call hdfs.restart_datanode() hdfs.restart_journalnode() if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()): utils.install_ssh_key('hdfs', namenode.ssh_key())
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) slaves = unitdata.kv().get('namenode.slaves', []) slaves_leaving = datanode.nodes() # only returns nodes in "leaving" state hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('namenode.slaves', slaves_remaining) hdfs.register_slaves(slaves_remaining) hdfs.reload_slaves() utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: remove_state('namenode.ready') datanode.dismiss()
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) nodes_leaving = datanode.nodes() # only returns nodes in "leaving" state slaves = unitdata.kv().get('namenode.slaves', []) slaves_leaving = [node['host'] for node in nodes_leaving] hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('namenode.slaves', slaves_remaining) hdfs.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: hookenv.status_set('blocked', 'Waiting for relation to DataNodes') remove_state('namenode.ready') datanode.dismiss()
def handle_legacy_installed_flag(): hadoop = get_hadoop_base() if hadoop.is_installed(): set_state('hadoop.installed')
def set_yarn_spec(resourcemanager): hadoop = get_hadoop_base() resourcemanager.set_local_spec(hadoop.spec())
def set_hdfs_spec(namenode): hadoop = get_hadoop_base() namenode.set_local_spec(hadoop.spec())
def install_hadoop(): hadoop = get_hadoop_base() hadoop.install() set_state('hadoop.installed')
def stop_nodemanager(): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.stop_nodemanager() hadoop.close_ports('nodemanager') remove_state('nodemanager.started')
def stop_zookeeper(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.stop_zookeeper() remove_state('namenode.zk.started')
def format_zookeeper(zookeeper): update_zk_config(zookeeper) # ensure config is up to date hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.format_zookeeper() leadership.leader_set({'zk-formatted': 'true'})
def fetch_resources(): hadoop = get_hadoop_base() if hadoop.verify_resources(): set_state('resources.available')