def configure_hdfs(hdfs_rel):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.start_resourcemanager()
    yarn.start_jobhistory()
    hadoop.open_ports('resourcemanager')
    set_state('resourcemanager.started')
Esempio n. 2
0
 def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump({
             'relation_name': namenode.relation_name,
             'conversations': {
                 conv.key: dict({'relation_ids': conv.relation_ids},
                                **conv.serialize(conv))
                 for conv in namenode.conversations()
             },
             'relation_data': {
                 rid: {
                     unit: hookenv.relation_get(unit=unit, rid=rid)
                     for unit in hookenv.related_units(rid)
                 } for rid in hookenv.relation_ids(namenode.relation_name)
             },
         }, default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(
         namenode.clustername(), namenode.namenodes(),
         namenode.port(), namenode.webhdfs_port())
     set_state('hadoop.hdfs.configured')
Esempio n. 3
0
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts(
        {node['ip']: node['host']
         for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node['host'] for node in datanode.nodes()]
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set(
        'active', 'Ready ({count} DataNode{s})'.format(
            count=len(slaves),
            s='s' if len(slaves) > 1 else '',
        ))
    set_state('namenode.ready')
Esempio n. 4
0
 def configure_yarn(resourcemanager):
     hadoop = get_hadoop_base()
     yarn = YARN(hadoop)
     utils.update_kv_hosts(resourcemanager.hosts_map())
     utils.manage_etc_hosts()
     if not resourcemanager.resourcemanagers():
         data = yaml.dump(
             {
                 'relation_name': resourcemanager.relation_name,
                 'conversations': {
                     conv.key: dict({'relation_ids': conv.relation_ids},
                                    **conv.serialize(conv))
                     for conv in resourcemanager.conversations()
                 },
                 'relation_data': {
                     rid: {
                         unit: hookenv.relation_get(unit=unit, rid=rid)
                         for unit in hookenv.related_units(rid)
                     }
                     for rid in hookenv.relation_ids(
                         resourcemanager.relation_name)
                 },
             },
             default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     yarn.configure_yarn_base(resourcemanager.resourcemanagers()[0],
                              resourcemanager.port(),
                              resourcemanager.hs_http(),
                              resourcemanager.hs_ipc())
     set_state('hadoop.yarn.configured')
Esempio n. 5
0
 def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump(
             {
                 'relation_name': namenode.relation_name,
                 'conversations': {
                     conv.key: dict({'relation_ids': conv.relation_ids},
                                    **conv.serialize(conv))
                     for conv in namenode.conversations()
                 },
                 'relation_data': {
                     rid: {
                         unit: hookenv.relation_get(unit=unit, rid=rid)
                         for unit in hookenv.related_units(rid)
                     }
                     for rid in hookenv.relation_ids(namenode.relation_name)
                 },
             },
             default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port())
     set_state('hadoop.hdfs.configured')
def send_info(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    port = hadoop.dist_config.port('resourcemanager')
    hs_http = hadoop.dist_config.port('jh_webapp_http')
    hs_ipc = hadoop.dist_config.port('jobhistory')

    utils.update_kv_hosts(nodemanager.hosts_map())
    utils.manage_etc_hosts()

    nodemanager.send_spec(hadoop.spec())
    nodemanager.send_resourcemanagers([local_hostname])
    nodemanager.send_ports(port, hs_http, hs_ipc)
    nodemanager.send_ssh_key(utils.get_ssh_key('yarn'))
    nodemanager.send_hosts_map(utils.get_kv_hosts())

    slaves = nodemanager.nodes()
    if data_changed('resourcemanager.slaves', slaves):
        unitdata.kv().set('resourcemanager.slaves', slaves)
        yarn.register_slaves(slaves)

    hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('resourcemanager.ready')
Esempio n. 7
0
 def configure_yarn(resourcemanager):
     hadoop = get_hadoop_base()
     yarn = YARN(hadoop)
     utils.update_kv_hosts(resourcemanager.hosts_map())
     utils.manage_etc_hosts()
     if not resourcemanager.resourcemanagers():
         data = yaml.dump({
             'relation_name': resourcemanager.relation_name,
             'conversations': {
                 conv.key: dict({'relation_ids': conv.relation_ids},
                                **conv.serialize(conv))
                 for conv in resourcemanager.conversations()
             },
             'relation_data': {
                 rid: {
                     unit: hookenv.relation_get(unit=unit, rid=rid)
                     for unit in hookenv.related_units(rid)
                 } for rid in hookenv.relation_ids(
                     resourcemanager.relation_name
                 )
             },
         }, default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     yarn.configure_yarn_base(
         resourcemanager.resourcemanagers()[0], resourcemanager.port(),
         resourcemanager.hs_http(), resourcemanager.hs_ipc())
     set_state('hadoop.yarn.configured')
Esempio n. 8
0
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts({node['ip']: node['host']
                           for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node['host'] for node in datanode.nodes()]
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('namenode.ready')
def start_datanode(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    update_config(namenode)  # force config update
    hdfs.start_datanode()
    hdfs.start_journalnode()
    hadoop.open_ports('datanode')
    set_state('datanode.started')
def update_zk_config(zookeeper):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    zk_nodes = sorted(zookeeper.zookeepers(), key=itemgetter('host'))
    zk_started = is_state('namenode.zk.started')
    hdfs.configure_zookeeper(zk_nodes)
    if zk_started and data_changed('namenode.zk', zk_nodes):
        hdfs.restart_zookeeper()
def hdfs_departed():
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    hadoop.close_ports('resourcemanager')
    yarn.stop_jobhistory()
    yarn.stop_resourcemanager()
    remove_state('resourcemanager.started')
    remove_state('resourcemanager.ready')
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    datanode.send_spec(hadoop.spec())
    datanode.send_clustername(hookenv.service_name())
    datanode.send_namenodes(get_cluster_nodes())
    datanode.send_ports(hdfs_port, webhdfs_port)
def configure_resourcemanager():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_resourcemanager()
    yarn.configure_jobhistory()
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('resourcemanager.configured')
def update_slaves(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    slaves = datanode.nodes()
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)
        hdfs.reload_slaves()

    set_state('namenode.ready')
Esempio n. 15
0
def accept_clients(clients):
    hadoop = get_hadoop_base()
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    clients.send_spec(hadoop.spec())
    clients.send_namenodes([local_hostname])
    clients.send_ports(hdfs_port, webhdfs_port)
    clients.send_hosts_map(utils.get_kv_hosts())
    clients.send_ready(True)
def configure_namenode():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode(get_cluster_nodes())
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.initialize_kv_host()
    utils.manage_etc_hosts()
    set_state('namenode.started')
Esempio n. 17
0
def accept_clients(clients):
    hadoop = get_hadoop_base()
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    clients.send_spec(hadoop.spec())
    clients.send_namenodes([local_hostname])
    clients.send_ports(hdfs_port, webhdfs_port)
    clients.send_hosts_map(utils.get_kv_hosts())
    clients.send_ready(True)
def accept_clients(clients):
    hadoop = get_hadoop_base()
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    clients.send_spec(hadoop.spec())
    clients.send_clustername(hookenv.service_name())
    clients.send_namenodes(get_cluster_nodes())
    clients.send_ports(hdfs_port, webhdfs_port)
    clients.send_hosts_map(utils.get_kv_hosts())
    clients.send_ready(True)
def start_zookeeper(zookeeper):
    local_hostname = hookenv.local_unit().replace('/', '-')
    if local_hostname not in get_cluster_nodes():
        # can't run zkfc on a non-cluster node
        return
    update_zk_config(zookeeper)  # ensure config is up to date
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.restart_namenode()
    hdfs.start_zookeeper()
    set_state('namenode.zk.started')
def accept_clients(clients):
    hadoop = get_hadoop_base()
    local_hostname = hookenv.local_unit().replace('/', '-')
    port = hadoop.dist_config.port('resourcemanager')
    hs_http = hadoop.dist_config.port('jh_webapp_http')
    hs_ipc = hadoop.dist_config.port('jobhistory')

    clients.send_spec(hadoop.spec())
    clients.send_resourcemanagers([local_hostname])
    clients.send_ports(port, hs_http, hs_ipc)
    clients.send_hosts_map(utils.get_kv_hosts())
    clients.send_ready(True)
def start_nodemanager(resourcemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_nodemanager(
        resourcemanager.resourcemanagers()[0], resourcemanager.port(),
        resourcemanager.hs_http(), resourcemanager.hs_ipc())
    utils.install_ssh_key('yarn', resourcemanager.ssh_key())
    utils.update_kv_hosts(resourcemanager.hosts_map())
    utils.manage_etc_hosts()
    yarn.start_nodemanager()
    hadoop.open_ports('nodemanager')
    set_state('nodemanager.started')
Esempio n. 22
0
def configure_namenode():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('namenode.started')
Esempio n. 23
0
def configure_namenode():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('namenode.started')
def start_nodemanager(resourcemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_nodemanager(resourcemanager.resourcemanagers()[0],
                               resourcemanager.port(),
                               resourcemanager.hs_http(),
                               resourcemanager.hs_ipc())
    utils.install_ssh_key('yarn', resourcemanager.ssh_key())
    utils.update_kv_hosts(resourcemanager.hosts_map())
    utils.manage_etc_hosts()
    yarn.start_nodemanager()
    hadoop.open_ports('nodemanager')
    set_state('nodemanager.started')
def ganglia_changed():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    yarn = YARN(hadoop)
    if is_state('namenode.started'):
        hdfs.restart_namenode()
    if is_state('datanode.started'):
        hdfs.restart_datanode()
    if is_state('journalnode.started'):
        hdfs.restart_journalnode()
    if is_state('resourcemanager.started'):
        yarn.restart_resourcemanager()
    if is_state('nodemanager.started'):
        yarn.restart_nodemanager()
Esempio n. 26
0
def ganglia_changed():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    yarn = YARN(hadoop)
    if is_state('namenode.started'):
        hdfs.restart_namenode()
    if is_state('datanode.started'):
        hdfs.restart_datanode()
    if is_state('journalnode.started'):
        hdfs.restart_journalnode()
    if is_state('resourcemanager.started'):
        yarn.restart_resourcemanager()
    if is_state('nodemanager.started'):
        yarn.restart_nodemanager()
def init_ha_active(datanode, cluster):
    """
    Do initial HA setup on the leader.
    """
    local_hostname = hookenv.local_unit().replace('/', '-')
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.stop_namenode()
    remove_state('namenode.started')
    # initial cluster is us (active) plus a standby
    set_cluster_nodes([local_hostname, cluster.nodes()[0]])
    update_ha_config(datanode)
    hdfs.init_sharededits()
    hdfs.start_namenode()
    leadership.leader_set({'ha-initialized': 'true'})
    set_state('namenode.started')
def update_ha_config(datanode):
    cluster_nodes = get_cluster_nodes()
    jn_nodes = sorted(datanode.nodes())
    jn_port = datanode.jn_port()
    started = is_state('namenode.started')
    new_cluster_config = data_changed('namenode.cluster-nodes', cluster_nodes)
    new_jn_config = data_changed('namenode.jn.config', (jn_nodes, jn_port))

    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode(cluster_nodes)
    hdfs.register_journalnodes(jn_nodes, jn_port)

    if started and new_cluster_config:
        hdfs.restart_namenode()
    elif started and new_jn_config:
        hdfs.reload_slaves()  # is this actually necessary?
def init_ha_standby(datanode, cluster):
    """
    Once initial HA setup is done, any new NameNode is started as standby.
    """
    local_hostname = hookenv.local_unit().replace('/', '-')
    if local_hostname not in get_cluster_nodes():
        # can't even bootstrapStandby if not in the list of chosen nodes
        return
    update_ha_config(datanode)  # ensure the config is written
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    update_ha_config(datanode)
    hdfs.bootstrap_standby()
    hdfs.start_namenode()
    cluster.standby_ready()
    set_state('namenode.standby')
    hadoop.open_ports('namenode')
    set_state('namenode.started')
def unregister_nodemanager(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)

    slaves = unitdata.kv().get('resourcemanager.slaves', [])
    slaves_leaving = nodemanager.nodes()
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('resourcemanager.slaves', slaves_remaining)
    yarn.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        remove_state('resourcemanager.ready')

    nodemanager.dismiss()
def update_config(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)

    utils.update_kv_hosts(namenode.hosts_map())
    utils.manage_etc_hosts()

    namenode_data = (
        namenode.clustername(), namenode.namenodes(),
        namenode.port(), namenode.webhdfs_port(),
    )
    if data_changed('datanode.namenode-data', namenode_data):
        hdfs.configure_datanode(*namenode_data)
        if is_state('datanode.started'):  # re-check because for manual call
            hdfs.restart_datanode()
            hdfs.restart_journalnode()

    if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()):
        utils.install_ssh_key('hdfs', namenode.ssh_key())
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)

    slaves = unitdata.kv().get('namenode.slaves', [])
    slaves_leaving = datanode.nodes()  # only returns nodes in "leaving" state
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('namenode.slaves', slaves_remaining)
    hdfs.register_slaves(slaves_remaining)
    hdfs.reload_slaves()

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        remove_state('namenode.ready')

    datanode.dismiss()
Esempio n. 33
0
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    nodes_leaving = datanode.nodes()  # only returns nodes in "leaving" state

    slaves = unitdata.kv().get('namenode.slaves', [])
    slaves_leaving = [node['host'] for node in nodes_leaving]
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('namenode.slaves', slaves_remaining)
    hdfs.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        hookenv.status_set('blocked', 'Waiting for relation to DataNodes')
        remove_state('namenode.ready')

    datanode.dismiss()
Esempio n. 34
0
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    nodes_leaving = datanode.nodes()  # only returns nodes in "leaving" state

    slaves = unitdata.kv().get('namenode.slaves', [])
    slaves_leaving = [node['host'] for node in nodes_leaving]
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('namenode.slaves', slaves_remaining)
    hdfs.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        hookenv.status_set('blocked', 'Waiting for relation to DataNodes')
        remove_state('namenode.ready')

    datanode.dismiss()
Esempio n. 35
0
def handle_legacy_installed_flag():
    hadoop = get_hadoop_base()
    if hadoop.is_installed():
        set_state('hadoop.installed')
Esempio n. 36
0
 def set_yarn_spec(resourcemanager):
     hadoop = get_hadoop_base()
     resourcemanager.set_local_spec(hadoop.spec())
Esempio n. 37
0
 def set_hdfs_spec(namenode):
     hadoop = get_hadoop_base()
     namenode.set_local_spec(hadoop.spec())
Esempio n. 38
0
def install_hadoop():
    hadoop = get_hadoop_base()
    hadoop.install()
    set_state('hadoop.installed')
Esempio n. 39
0
 def set_yarn_spec(resourcemanager):
     hadoop = get_hadoop_base()
     resourcemanager.set_local_spec(hadoop.spec())
def stop_nodemanager():
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.stop_nodemanager()
    hadoop.close_ports('nodemanager')
    remove_state('nodemanager.started')
def stop_zookeeper():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.stop_zookeeper()
    remove_state('namenode.zk.started')
def format_zookeeper(zookeeper):
    update_zk_config(zookeeper)  # ensure config is up to date
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.format_zookeeper()
    leadership.leader_set({'zk-formatted': 'true'})
Esempio n. 43
0
def fetch_resources():
    hadoop = get_hadoop_base()
    if hadoop.verify_resources():
        set_state('resources.available')
Esempio n. 44
0
 def set_hdfs_spec(namenode):
     hadoop = get_hadoop_base()
     namenode.set_local_spec(hadoop.spec())