def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts( {node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in datanode.nodes()] if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hookenv.status_set( 'active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump( { 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port()) set_state('hadoop.hdfs.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts(datanode.hosts_map()) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = datanode.nodes() if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hdfs.refresh_slaves() hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def stop_datanode(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.stop_datanode() hdfs.stop_journalnode() hadoop.close_ports('datanode') remove_state('datanode.started')
def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump({ 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port()) set_state('hadoop.hdfs.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace("/", "-") hdfs_port = hadoop.dist_config.port("namenode") webhdfs_port = hadoop.dist_config.port("nn_webapp_http") utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key("hdfs")) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node["host"] for node in datanode.nodes()] if data_changed("namenode.slaves", slaves): unitdata.kv().set("namenode.slaves", slaves) hdfs.register_slaves(slaves) hookenv.status_set( "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "") ) set_state("namenode.ready")
def update_zk_config(zookeeper): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) zk_nodes = sorted(zookeeper.zookeepers(), key=itemgetter('host')) zk_started = is_state('namenode.zk.started') hdfs.configure_zookeeper(zk_nodes) if zk_started and data_changed('namenode.zk', zk_nodes): hdfs.restart_zookeeper()
def start_datanode(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) update_config(namenode) # force config update hdfs.start_datanode() hdfs.start_journalnode() hadoop.open_ports('datanode') set_state('datanode.started')
def update_slaves(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) slaves = datanode.nodes() if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hdfs.reload_slaves() set_state('namenode.ready')
def start_datanode(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_datanode(namenode.namenodes()[0], namenode.port()) utils.install_ssh_key('hdfs', namenode.ssh_key()) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() hdfs.start_datanode() hadoop.open_ports('datanode') set_state('datanode.started')
def start_zookeeper(zookeeper): local_hostname = hookenv.local_unit().replace('/', '-') if local_hostname not in get_cluster_nodes(): # can't run zkfc on a non-cluster node return update_zk_config(zookeeper) # ensure config is up to date hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.restart_namenode() hdfs.start_zookeeper() set_state('namenode.zk.started')
def configure_namenode(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.update_kv_hosts({ip_addr: local_hostname}) set_state('namenode.started')
def configure_namenode(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode(get_cluster_nodes()) hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.initialize_kv_host() utils.manage_etc_hosts() set_state('namenode.started')
def configure_namenode(): local_hostname = hookenv.local_unit().replace("/", "-") private_address = hookenv.unit_get("private-address") ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports("namenode") utils.update_kv_hosts({ip_addr: local_hostname}) set_state("namenode.started")
def init_ha_standby(datanode, cluster): """ Once initial HA setup is done, any new NameNode is started as standby. """ local_hostname = hookenv.local_unit().replace('/', '-') if local_hostname not in get_cluster_nodes(): # can't even bootstrapStandby if not in the list of chosen nodes return update_ha_config(datanode) # ensure the config is written hadoop = get_hadoop_base() hdfs = HDFS(hadoop) update_ha_config(datanode) hdfs.bootstrap_standby() hdfs.start_namenode() cluster.standby_ready() set_state('namenode.standby') hadoop.open_ports('namenode') set_state('namenode.started')
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) slaves = unitdata.kv().get('namenode.slaves', []) slaves_leaving = datanode.nodes() # only returns nodes in "leaving" state hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('namenode.slaves', slaves_remaining) hdfs.register_slaves(slaves_remaining) hdfs.reload_slaves() utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: remove_state('namenode.ready') datanode.dismiss()
def update_ha_config(datanode): cluster_nodes = get_cluster_nodes() jn_nodes = sorted(datanode.nodes()) jn_port = datanode.jn_port() started = is_state('namenode.started') new_cluster_config = data_changed('namenode.cluster-nodes', cluster_nodes) new_jn_config = data_changed('namenode.jn.config', (jn_nodes, jn_port)) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode(cluster_nodes) hdfs.register_journalnodes(jn_nodes, jn_port) if started and new_cluster_config: hdfs.restart_namenode() elif started and new_jn_config: hdfs.reload_slaves() # is this actually necessary?
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) nodes_leaving = datanode.nodes() # only returns nodes in "leaving" state slaves = unitdata.kv().get('namenode.slaves', []) slaves_leaving = [node['host'] for node in nodes_leaving] hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('namenode.slaves', slaves_remaining) hdfs.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: hookenv.status_set('blocked', 'Waiting for relation to DataNodes') remove_state('namenode.ready') datanode.dismiss()
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) nodes_leaving = datanode.nodes() # only returns nodes in "leaving" state slaves = unitdata.kv().get("namenode.slaves", []) slaves_leaving = [node["host"] for node in nodes_leaving] hookenv.log("Slaves leaving: {}".format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set("namenode.slaves", slaves_remaining) hdfs.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: hookenv.status_set("blocked", "Waiting for relation to DataNodes") remove_state("namenode.ready") datanode.dismiss()
def ganglia_changed(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) yarn = YARN(hadoop) if is_state('namenode.started'): hdfs.restart_namenode() if is_state('datanode.started'): hdfs.restart_datanode() if is_state('journalnode.started'): hdfs.restart_journalnode() if is_state('resourcemanager.started'): yarn.restart_resourcemanager() if is_state('nodemanager.started'): yarn.restart_nodemanager()
def configure_ha(cluster, datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) cluster_nodes = cluster.nodes() jn_nodes = datanode.nodes() jn_port = datanode.jn_port() if data_changed('namenode.ha', [cluster_nodes, jn_nodes, jn_port]): utils.update_kv_hosts(cluster.hosts_map()) utils.manage_etc_hosts() hdfs.register_journalnodes(jn_nodes, jn_port) hdfs.restart_namenode() datanode.send_namenodes(cluster_nodes) if not is_state('namenode.shared-edits.init'): hdfs.init_sharededits() set_state('namenode.shared-edits.init')
def init_ha_active(datanode, cluster): """ Do initial HA setup on the leader. """ local_hostname = hookenv.local_unit().replace('/', '-') hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.stop_namenode() remove_state('namenode.started') # initial cluster is us (active) plus a standby set_cluster_nodes([local_hostname, cluster.nodes()[0]]) update_ha_config(datanode) hdfs.init_sharededits() hdfs.start_namenode() leadership.leader_set({'ha-initialized': 'true'}) set_state('namenode.started')
def update_config(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() namenode_data = ( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port(), ) if data_changed('datanode.namenode-data', namenode_data): hdfs.configure_datanode(*namenode_data) if is_state('datanode.started'): # re-check because for manual call hdfs.restart_datanode() hdfs.restart_journalnode() if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()): utils.install_ssh_key('hdfs', namenode.ssh_key())
def stop_zookeeper(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.stop_zookeeper() remove_state('namenode.zk.started')
def format_zookeeper(zookeeper): update_zk_config(zookeeper) # ensure config is up to date hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.format_zookeeper() leadership.leader_set({'zk-formatted': 'true'})