def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump( { 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port()) set_state('hadoop.hdfs.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts( {node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in datanode.nodes()] if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hookenv.status_set( 'active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def configure_namenode(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.update_kv_hosts({ip_addr: local_hostname}) set_state('namenode.started')
def ganglia_changed(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) yarn = YARN(hadoop) if is_state('namenode.started'): hdfs.restart_namenode() if is_state('datanode.started'): hdfs.restart_datanode() if is_state('journalnode.started'): hdfs.restart_journalnode() if is_state('resourcemanager.started'): yarn.restart_resourcemanager() if is_state('nodemanager.started'): yarn.restart_nodemanager()
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) nodes_leaving = datanode.nodes() # only returns nodes in "leaving" state slaves = unitdata.kv().get('namenode.slaves', []) slaves_leaving = [node['host'] for node in nodes_leaving] hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('namenode.slaves', slaves_remaining) hdfs.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: hookenv.status_set('blocked', 'Waiting for relation to DataNodes') remove_state('namenode.ready') datanode.dismiss()