def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump({
             'relation_name': namenode.relation_name,
             'conversations': {
                 conv.key: dict({'relation_ids': conv.relation_ids},
                                **conv.serialize(conv))
                 for conv in namenode.conversations()
             },
             'relation_data': {
                 rid: {
                     unit: hookenv.relation_get(unit=unit, rid=rid)
                     for unit in hookenv.related_units(rid)
                 } for rid in hookenv.relation_ids(namenode.relation_name)
             },
         }, default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(
         namenode.clustername(), namenode.namenodes(),
         namenode.port(), namenode.webhdfs_port())
     set_state('hadoop.hdfs.configured')
Exemple #2
0
def send_nm_all_info(nodemanager):
    """Send nodemanagers all mapred-slave relation data.

    At this point, the resourcemanager is ready to serve nodemanagers. Send all
    mapred-slave relation data so that our 'resourcemanager.ready' state becomes set.
    """
    bigtop = Bigtop()
    rm_host = get_fqdn()
    rm_ipc = get_layer_opts().port('resourcemanager')
    jh_ipc = get_layer_opts().port('jobhistory')
    jh_http = get_layer_opts().port('jh_webapp_http')

    nodemanager.send_resourcemanagers([rm_host])
    nodemanager.send_spec(bigtop.spec())
    nodemanager.send_ports(rm_ipc, jh_http, jh_ipc)

    # hosts_map and ssh_key are required by the mapred-slave interface to signify
    # RM's readiness. Send them, even though they are not utilized by bigtop.
    # NB: update KV hosts with all nodemanagers prior to sending the hosts_map
    # because mapred-slave gates readiness on a NM's presence in the hosts_map.
    utils.update_kv_hosts(nodemanager.hosts_map())
    nodemanager.send_hosts_map(utils.get_kv_hosts())
    nodemanager.send_ssh_key('invalid')

    # update status with slave count and report ready for hdfs
    num_slaves = len(nodemanager.nodes())
    hookenv.status_set('active', 'ready ({count} nodemanager{s})'.format(
        count=num_slaves,
        s='s' if num_slaves > 1 else '',
    ))
    set_state('apache-bigtop-resourcemanager.ready')
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace("/", "-")
    hdfs_port = hadoop.dist_config.port("namenode")
    webhdfs_port = hadoop.dist_config.port("nn_webapp_http")

    utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key("hdfs"))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node["host"] for node in datanode.nodes()]
    if data_changed("namenode.slaves", slaves):
        unitdata.kv().set("namenode.slaves", slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set(
        "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "")
    )
    set_state("namenode.ready")
 def configure_yarn(resourcemanager):
     hadoop = get_hadoop_base()
     yarn = YARN(hadoop)
     utils.update_kv_hosts(resourcemanager.hosts_map())
     utils.manage_etc_hosts()
     if not resourcemanager.resourcemanagers():
         data = yaml.dump({
             'relation_name': resourcemanager.relation_name,
             'conversations': {
                 conv.key: dict({'relation_ids': conv.relation_ids},
                                **conv.serialize(conv))
                 for conv in resourcemanager.conversations()
             },
             'relation_data': {
                 rid: {
                     unit: hookenv.relation_get(unit=unit, rid=rid)
                     for unit in hookenv.related_units(rid)
                 } for rid in hookenv.relation_ids(
                     resourcemanager.relation_name
                 )
             },
         }, default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     yarn.configure_yarn_base(
         resourcemanager.resourcemanagers()[0], resourcemanager.port(),
         resourcemanager.hs_http(), resourcemanager.hs_ipc())
     set_state('hadoop.yarn.configured')
Exemple #5
0
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts(
        {node['ip']: node['host']
         for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node['host'] for node in datanode.nodes()]
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set(
        'active', 'Ready ({count} DataNode{s})'.format(
            count=len(slaves),
            s='s' if len(slaves) > 1 else '',
        ))
    set_state('namenode.ready')
def send_info(datanode):
    hadoop = get_bigtop_base()
    # hdfs = HDFS(hadoop)
    # local_hostname = hookenv.local_unit().replace('/', '-')
    # hdfs_port = hadoop.dist_config.port('namenode')
    # webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts({node['ip']: node['host']
                           for node in datanode.nodes()})
    utils.manage_etc_hosts()

    # datanode.send_spec(hadoop.spec())
    # datanode.send_namenodes([local_hostname])
    # datanode.send_ports(hdfs_port, webhdfs_port)
    # datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    # slaves = [node['host'] for node in datanode.nodes()]
    # if data_changed('namenode.slaves', slaves):
    #     unitdata.kv().set('namenode.slaves', slaves)
    #     hdfs.register_slaves(slaves)

    # hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format(
    #     count=len(slaves),
    #     s='s' if len(slaves) > 1 else '',
    # ))
    set_state('namenode.ready')
    hookenv.status_set('active', 'ready')
Exemple #7
0
def send_dn_all_info(datanode):
    """Send datanodes all dfs-slave relation data.

    At this point, the namenode is ready to serve datanodes. Send all
    dfs-slave relation data so that our 'namenode.ready' state becomes set.
    """
    bigtop = Bigtop()
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    datanode.send_spec(bigtop.spec())
    datanode.send_namenodes([fqdn])
    datanode.send_ports(hdfs_port, webhdfs_port)

    # hosts_map, ssh_key, and clustername are required by the dfs-slave
    # interface to signify NN's readiness. Send them, even though they are not
    # utilized by bigtop.
    # NB: update KV hosts with all datanodes prior to sending the hosts_map
    # because dfs-slave gates readiness on a DN's presence in the hosts_map.
    utils.update_kv_hosts(datanode.hosts_map())
    datanode.send_hosts_map(utils.get_kv_hosts())
    datanode.send_ssh_key('invalid')
    datanode.send_clustername(hookenv.service_name())

    # update status with slave count and report ready for hdfs
    num_slaves = len(datanode.nodes())
    hookenv.status_set(
        'active', 'ready ({count} datanode{s})'.format(
            count=num_slaves,
            s='s' if num_slaves > 1 else '',
        ))
    set_state('apache-bigtop-namenode.ready')
 def configure_yarn(resourcemanager):
     hadoop = get_hadoop_base()
     yarn = YARN(hadoop)
     utils.update_kv_hosts(resourcemanager.hosts_map())
     utils.manage_etc_hosts()
     if not resourcemanager.resourcemanagers():
         data = yaml.dump(
             {
                 'relation_name': resourcemanager.relation_name,
                 'conversations': {
                     conv.key: dict({'relation_ids': conv.relation_ids},
                                    **conv.serialize(conv))
                     for conv in resourcemanager.conversations()
                 },
                 'relation_data': {
                     rid: {
                         unit: hookenv.relation_get(unit=unit, rid=rid)
                         for unit in hookenv.related_units(rid)
                     }
                     for rid in hookenv.relation_ids(
                         resourcemanager.relation_name)
                 },
             },
             default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     yarn.configure_yarn_base(resourcemanager.resourcemanagers()[0],
                              resourcemanager.port(),
                              resourcemanager.hs_http(),
                              resourcemanager.hs_ipc())
     set_state('hadoop.yarn.configured')
 def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump(
             {
                 'relation_name': namenode.relation_name,
                 'conversations': {
                     conv.key: dict({'relation_ids': conv.relation_ids},
                                    **conv.serialize(conv))
                     for conv in namenode.conversations()
                 },
                 'relation_data': {
                     rid: {
                         unit: hookenv.relation_get(unit=unit, rid=rid)
                         for unit in hookenv.related_units(rid)
                     }
                     for rid in hookenv.relation_ids(namenode.relation_name)
                 },
             },
             default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port())
     set_state('hadoop.hdfs.configured')
def send_info(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    port = hadoop.dist_config.port('resourcemanager')
    hs_http = hadoop.dist_config.port('jh_webapp_http')
    hs_ipc = hadoop.dist_config.port('jobhistory')

    utils.update_kv_hosts(nodemanager.hosts_map())
    utils.manage_etc_hosts()

    nodemanager.send_spec(hadoop.spec())
    nodemanager.send_resourcemanagers([local_hostname])
    nodemanager.send_ports(port, hs_http, hs_ipc)
    nodemanager.send_ssh_key(utils.get_ssh_key('yarn'))
    nodemanager.send_hosts_map(utils.get_kv_hosts())

    slaves = nodemanager.nodes()
    if data_changed('resourcemanager.slaves', slaves):
        unitdata.kv().set('resourcemanager.slaves', slaves)
        yarn.register_slaves(slaves)

    hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('resourcemanager.ready')
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts(datanode.hosts_map())
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = datanode.nodes()
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)
        hdfs.refresh_slaves()

    hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('namenode.ready')
def send_info(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    port = hadoop.dist_config.port('resourcemanager')
    hs_http = hadoop.dist_config.port('jh_webapp_http')
    hs_ipc = hadoop.dist_config.port('jobhistory')

    utils.update_kv_hosts({node['ip']: node['host'] for node in nodemanager.nodes()})
    utils.manage_etc_hosts()

    nodemanager.send_spec(hadoop.spec())
    nodemanager.send_resourcemanagers([local_hostname])
    nodemanager.send_ports(port, hs_http, hs_ipc)
    nodemanager.send_ssh_key(utils.get_ssh_key('hdfs'))
    nodemanager.send_hosts_map(utils.get_kv_hosts())

    slaves = [node['host'] for node in nodemanager.nodes()]
    if data_changed('resourcemanager.slaves', slaves):
        unitdata.kv().set('resourcemanager.slaves', slaves)
        yarn.register_slaves(slaves)

    hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('resourcemanager.ready')
Exemple #13
0
def send_dn_all_info(datanode):
    """Send datanodes all dfs-slave relation data.

    At this point, the namenode is ready to serve datanodes. Send all
    dfs-slave relation data so that our 'namenode.ready' state becomes set.
    """
    bigtop = Bigtop()
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    datanode.send_spec(bigtop.spec())
    datanode.send_namenodes([fqdn])
    datanode.send_ports(hdfs_port, webhdfs_port)

    # hosts_map, ssh_key, and clustername are required by the dfs-slave
    # interface to signify NN's readiness. Send them, even though they are not
    # utilized by bigtop.
    # NB: update KV hosts with all datanodes prior to sending the hosts_map
    # because dfs-slave gates readiness on a DN's presence in the hosts_map.
    utils.update_kv_hosts(datanode.hosts_map())
    datanode.send_hosts_map(utils.get_kv_hosts())
    datanode.send_ssh_key('invalid')
    datanode.send_clustername(hookenv.service_name())

    # update status with slave count and report ready for hdfs
    num_slaves = len(datanode.nodes())
    hookenv.status_set('active', 'ready ({count} datanode{s})'.format(
        count=num_slaves,
        s='s' if num_slaves > 1 else '',
    ))
    set_state('apache-bigtop-namenode.ready')
def configure_resourcemanager():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_resourcemanager()
    yarn.configure_jobhistory()
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('resourcemanager.configured')
def configure_resourcemanager():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_resourcemanager()
    yarn.configure_jobhistory()
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('resourcemanager.configured')
def start_datanode(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_datanode(namenode.namenodes()[0], namenode.port())
    utils.install_ssh_key('hdfs', namenode.ssh_key())
    utils.update_kv_hosts(namenode.hosts_map())
    utils.manage_etc_hosts()
    hdfs.start_datanode()
    hadoop.open_ports('datanode')
    set_state('datanode.started')
def start_nodemanager(resourcemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_nodemanager(
        resourcemanager.resourcemanagers()[0], resourcemanager.port(),
        resourcemanager.hs_http(), resourcemanager.hs_ipc())
    utils.install_ssh_key('yarn', resourcemanager.ssh_key())
    utils.update_kv_hosts(resourcemanager.hosts_map())
    utils.manage_etc_hosts()
    yarn.start_nodemanager()
    hadoop.open_ports('nodemanager')
    set_state('nodemanager.started')
def configure_namenode():
    local_hostname = hookenv.local_unit().replace("/", "-")
    private_address = hookenv.unit_get("private-address")
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports("namenode")
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state("namenode.started")
Exemple #19
0
def configure_namenode():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('namenode.started')
def start_nodemanager(resourcemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_nodemanager(resourcemanager.resourcemanagers()[0],
                               resourcemanager.port(),
                               resourcemanager.hs_http(),
                               resourcemanager.hs_ipc())
    utils.install_ssh_key('yarn', resourcemanager.ssh_key())
    utils.update_kv_hosts(resourcemanager.hosts_map())
    utils.manage_etc_hosts()
    yarn.start_nodemanager()
    hadoop.open_ports('nodemanager')
    set_state('nodemanager.started')
def configure_ha(cluster, datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    cluster_nodes = cluster.nodes()
    jn_nodes = datanode.nodes()
    jn_port = datanode.jn_port()
    if data_changed('namenode.ha', [cluster_nodes, jn_nodes, jn_port]):
        utils.update_kv_hosts(cluster.hosts_map())
        utils.manage_etc_hosts()
        hdfs.register_journalnodes(jn_nodes, jn_port)
        hdfs.restart_namenode()
        datanode.send_namenodes(cluster_nodes)
        if not is_state('namenode.shared-edits.init'):
            hdfs.init_sharededits()
            set_state('namenode.shared-edits.init')
Exemple #22
0
    def configure_hosts_file(self):
        """
        Add the unit's private-address to /etc/hosts to ensure that Java
        can resolve the hostname of the server to its real IP address.
        We derive our hostname from the unit_id, replacing / with -.
        """
        local_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
        hostname = hookenv.local_unit().replace('/', '-')
        utils.update_kv_hosts({local_ip: hostname})
        utils.manage_etc_hosts()

        # update name of host to more semantically meaningful value
        # (this is required on some providers; the /etc/hosts entry must match
        # the /etc/hostname lest Hadoop get confused about where certain things
        # should be run)
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
    def configure_hosts_file(self):
        """
        Add the unit's private-address to /etc/hosts to ensure that Java
        can resolve the hostname of the server to its real IP address.
        We derive our hostname from the unit_id, replacing / with -.
        """
        local_ip = utils.resolve_private_address(
            hookenv.unit_get('private-address'))
        hostname = hookenv.local_unit().replace('/', '-')
        utils.update_kv_hosts({local_ip: hostname})
        utils.manage_etc_hosts()

        # update name of host to more semantically meaningful value
        # (this is required on some providers; the /etc/hosts entry must match
        # the /etc/hostname lest Hadoop get confused about where certain things
        # should be run)
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
def update_config(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)

    utils.update_kv_hosts(namenode.hosts_map())
    utils.manage_etc_hosts()

    namenode_data = (
        namenode.clustername(), namenode.namenodes(),
        namenode.port(), namenode.webhdfs_port(),
    )
    if data_changed('datanode.namenode-data', namenode_data):
        hdfs.configure_datanode(*namenode_data)
        if is_state('datanode.started'):  # re-check because for manual call
            hdfs.restart_datanode()
            hdfs.restart_journalnode()

    if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()):
        utils.install_ssh_key('hdfs', namenode.ssh_key())
def manage_cluster_hosts(cluster):
    utils.update_kv_hosts(cluster.hosts_map())
    utils.manage_etc_hosts()
def manage_datanode_hosts(datanode):
    utils.update_kv_hosts(datanode.hosts_map())
    utils.manage_etc_hosts()
    datanode.send_hosts_map(utils.get_kv_hosts())