def send_info(datanode): hadoop = get_hadoop_base() hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') datanode.send_spec(hadoop.spec()) datanode.send_clustername(hookenv.service_name()) datanode.send_namenodes(get_cluster_nodes()) datanode.send_ports(hdfs_port, webhdfs_port)
def start_zookeeper(zookeeper): local_hostname = hookenv.local_unit().replace('/', '-') if local_hostname not in get_cluster_nodes(): # can't run zkfc on a non-cluster node return update_zk_config(zookeeper) # ensure config is up to date hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.restart_namenode() hdfs.start_zookeeper() set_state('namenode.zk.started')
def configure_namenode(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode(get_cluster_nodes()) hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.initialize_kv_host() utils.manage_etc_hosts() set_state('namenode.started')
def accept_clients(clients): hadoop = get_hadoop_base() hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') clients.send_spec(hadoop.spec()) clients.send_clustername(hookenv.service_name()) clients.send_namenodes(get_cluster_nodes()) clients.send_ports(hdfs_port, webhdfs_port) clients.send_hosts_map(utils.get_kv_hosts()) clients.send_ready(True)
def update_ha_config(datanode): cluster_nodes = get_cluster_nodes() jn_nodes = sorted(datanode.nodes()) jn_port = datanode.jn_port() started = is_state('namenode.started') new_cluster_config = data_changed('namenode.cluster-nodes', cluster_nodes) new_jn_config = data_changed('namenode.jn.config', (jn_nodes, jn_port)) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode(cluster_nodes) hdfs.register_journalnodes(jn_nodes, jn_port) if started and new_cluster_config: hdfs.restart_namenode() elif started and new_jn_config: hdfs.reload_slaves() # is this actually necessary?
def report_status(datanode): num_slaves = len(datanode.nodes()) local_hostname = hookenv.local_unit().replace("/", "-") chosen_nodes = get_cluster_nodes() cluster_roles = set(utils.ha_node_state(node, 1) for node in chosen_nodes) chosen = local_hostname in chosen_nodes started = is_state("namenode.started") ha = is_state("leadership.set.namenode-ha") clustered = is_state("namenode-cluster.joined") active = "active" in cluster_roles standby = "standby" in cluster_roles healthy = active and standby quorum = is_state("journalnode.quorum") failover = "automatic" if is_state("zookeeper.ready") else "manual" degraded = ha and not all([clustered, quorum, healthy]) if not ha: if started: extra = "standalone" else: extra = "down" else: if chosen: role = utils.ha_node_state(local_hostname) or "down" else: role = "extra" if not degraded: extra = "HA {}, with {} fail-over".format(role, failover) else: missing = " and ".join( filter( None, [ "NameNode" if not clustered else None, "JournalNodes" if not quorum else None, "active" if not active else None, "standby" if not standby else None, ], ) ) extra = "HA degraded {} (missing: {}), with {} fail-over".format(role, missing, failover) hookenv.status_set( "active", "Ready ({count} DataNode{s}, {extra})".format(count=num_slaves, s="s" if num_slaves > 1 else "", extra=extra), )
def init_ha_standby(datanode, cluster): """ Once initial HA setup is done, any new NameNode is started as standby. """ local_hostname = hookenv.local_unit().replace('/', '-') if local_hostname not in get_cluster_nodes(): # can't even bootstrapStandby if not in the list of chosen nodes return update_ha_config(datanode) # ensure the config is written hadoop = get_hadoop_base() hdfs = HDFS(hadoop) update_ha_config(datanode) hdfs.bootstrap_standby() hdfs.start_namenode() cluster.standby_ready() set_state('namenode.standby') hadoop.open_ports('namenode') set_state('namenode.started')
def check_cluster_nodes(cluster, datanode): """ Check to see if any of the chosen cluster nodes have gone away and been replaced by viable replacements. Note that we only remove a chosen node if it is no longer part of the peer relation *and* has been replaced by a working node. This ensures that reboots and intermittent node loses don't cause superfluous updates. """ local_hostname = hookenv.local_unit().replace('/', '-') manage_cluster_hosts(cluster) # ensure /etc/hosts is up-to-date chosen_nodes = set(get_cluster_nodes()) current_nodes = set([local_hostname] + cluster.nodes()) remaining_nodes = chosen_nodes & current_nodes added_nodes = current_nodes - chosen_nodes if len(remaining_nodes) < 2 and added_nodes: chosen_nodes = (sorted(remaining_nodes) + sorted(added_nodes))[:2] set_cluster_nodes(chosen_nodes) update_ha_config(datanode) # ensure new config gets written
def update_clients(clients): clients.send_namenodes(get_cluster_nodes())
def update_nodes(datanode): datanode.send_namenodes(get_cluster_nodes())