def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='resourcemanager')
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # Add our ubuntu user to the hadoop and mapred groups.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
Esempio n. 2
0
def send_client_all_info(client):
    """Send clients (plugin, RM, non-DNs) all dfs relation data.

    At this point, the namenode is ready to serve clients. Send all
    dfs relation data so that our 'namenode.ready' state becomes set.
    """
    bigtop = Bigtop()
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    client.send_spec(bigtop.spec())
    client.send_namenodes([fqdn])
    client.send_ports(hdfs_port, webhdfs_port)
    # namenode.ready implies we have at least 1 datanode, which means hdfs
    # is ready for use. Inform clients of that with send_ready().
    if is_state('apache-bigtop-namenode.ready'):
        client.send_ready(True)
    else:
        client.send_ready(False)

    # hosts_map and clustername are required by the dfs interface to signify
    # NN's readiness. Send it, even though they are not utilized by bigtop.
    client.send_hosts_map(utils.get_kv_hosts())
    client.send_clustername(hookenv.service_name())
Esempio n. 3
0
def send_dn_all_info(datanode):
    """Send datanodes all dfs-slave relation data.

    At this point, the namenode is ready to serve datanodes. Send all
    dfs-slave relation data so that our 'namenode.ready' state becomes set.
    """
    bigtop = Bigtop()
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    datanode.send_spec(bigtop.spec())
    datanode.send_namenodes([fqdn])
    datanode.send_ports(hdfs_port, webhdfs_port)

    # hosts_map, ssh_key, and clustername are required by the dfs-slave
    # interface to signify NN's readiness. Send them, even though they are not
    # utilized by bigtop.
    # NB: update KV hosts with all datanodes prior to sending the hosts_map
    # because dfs-slave gates readiness on a DN's presence in the hosts_map.
    utils.update_kv_hosts(datanode.hosts_map())
    datanode.send_hosts_map(utils.get_kv_hosts())
    datanode.send_ssh_key('invalid')
    datanode.send_clustername(hookenv.service_name())

    # update status with slave count and report ready for hdfs
    num_slaves = len(datanode.nodes())
    hookenv.status_set('active', 'ready ({count} datanode{s})'.format(
        count=num_slaves,
        s='s' if num_slaves > 1 else '',
    ))
    set_state('apache-bigtop-namenode.ready')
Esempio n. 4
0
def send_client_all_info(client):
    """Send clients (plugin, RM, non-DNs) all dfs relation data.

    At this point, the namenode is ready to serve clients. Send all
    dfs relation data so that our 'namenode.ready' state becomes set.
    """
    bigtop = Bigtop()
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    client.send_spec(bigtop.spec())
    client.send_namenodes([fqdn])
    client.send_ports(hdfs_port, webhdfs_port)
    # namenode.ready implies we have at least 1 datanode, which means hdfs
    # is ready for use. Inform clients of that with send_ready().
    if is_state('apache-bigtop-namenode.ready'):
        client.send_ready(True)
    else:
        client.send_ready(False)

    # hosts_map and clustername are required by the dfs interface to signify
    # NN's readiness. Send it, even though they are not utilized by bigtop.
    client.send_hosts_map(utils.get_kv_hosts())
    client.send_clustername(hookenv.service_name())
Esempio n. 5
0
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='resourcemanager')
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # Add our ubuntu user to the hadoop and mapred groups.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
Esempio n. 6
0
def send_nm_all_info(nodemanager):
    """Send nodemanagers all mapred-slave relation data.

    At this point, the resourcemanager is ready to serve nodemanagers. Send all
    mapred-slave relation data so that our 'resourcemanager.ready' state becomes set.
    """
    bigtop = Bigtop()
    rm_host = get_fqdn()
    rm_ipc = get_layer_opts().port('resourcemanager')
    jh_ipc = get_layer_opts().port('jobhistory')
    jh_http = get_layer_opts().port('jh_webapp_http')

    nodemanager.send_resourcemanagers([rm_host])
    nodemanager.send_spec(bigtop.spec())
    nodemanager.send_ports(rm_ipc, jh_http, jh_ipc)

    # hosts_map and ssh_key are required by the mapred-slave interface to signify
    # RM's readiness. Send them, even though they are not utilized by bigtop.
    # NB: update KV hosts with all nodemanagers prior to sending the hosts_map
    # because mapred-slave gates readiness on a NM's presence in the hosts_map.
    utils.update_kv_hosts(nodemanager.hosts_map())
    nodemanager.send_hosts_map(utils.get_kv_hosts())
    nodemanager.send_ssh_key('invalid')

    # update status with slave count and report ready for hdfs
    num_slaves = len(nodemanager.nodes())
    hookenv.status_set('active', 'ready ({count} nodemanager{s})'.format(
        count=num_slaves,
        s='s' if num_slaves > 1 else '',
    ))
    set_state('apache-bigtop-resourcemanager.ready')
Esempio n. 7
0
def send_client_all_info(client):
    """Send clients (plugin, RM, non-DNs) all dfs relation data.

    At this point, the resourcemanager is ready to serve clients. Send all
    mapred relation data so that our 'resourcemanager.ready' state becomes set.
    """
    bigtop = Bigtop()
    rm_host = get_fqdn()
    rm_ipc = get_layer_opts().port('resourcemanager')
    jh_ipc = get_layer_opts().port('jobhistory')
    jh_http = get_layer_opts().port('jh_webapp_http')

    client.send_resourcemanagers([rm_host])
    client.send_spec(bigtop.spec())
    client.send_ports(rm_ipc, jh_http, jh_ipc)

    # resourcemanager.ready implies we have at least 1 nodemanager, which means
    # yarn is ready for use. Inform clients of that with send_ready().
    if is_state('apache-bigtop-resourcemanager.ready'):
        client.send_ready(True)
    else:
        client.send_ready(False)

    # hosts_map is required by the mapred interface to signify
    # RM's readiness. Send it, even though it is not utilized by bigtop.
    client.send_hosts_map(utils.get_kv_hosts())
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    nn_host = get_fqdn()
    hosts = {'namenode': nn_host}
    bigtop.render_site_yaml(hosts=hosts, roles='namenode')
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Esempio n. 9
0
def send_dn_all_info(datanode):
    """Send datanodes all dfs-slave relation data.

    At this point, the namenode is ready to serve datanodes. Send all
    dfs-slave relation data so that our 'namenode.ready' state becomes set.
    """
    bigtop = Bigtop()
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    datanode.send_spec(bigtop.spec())
    datanode.send_namenodes([fqdn])
    datanode.send_ports(hdfs_port, webhdfs_port)

    # hosts_map, ssh_key, and clustername are required by the dfs-slave
    # interface to signify NN's readiness. Send them, even though they are not
    # utilized by bigtop.
    # NB: update KV hosts with all datanodes prior to sending the hosts_map
    # because dfs-slave gates readiness on a DN's presence in the hosts_map.
    utils.update_kv_hosts(datanode.hosts_map())
    datanode.send_hosts_map(utils.get_kv_hosts())
    datanode.send_ssh_key('invalid')
    datanode.send_clustername(hookenv.service_name())

    # update status with slave count and report ready for hdfs
    num_slaves = len(datanode.nodes())
    hookenv.status_set(
        'active', 'ready ({count} datanode{s})'.format(
            count=num_slaves,
            s='s' if num_slaves > 1 else '',
        ))
    set_state('apache-bigtop-namenode.ready')
    def test_get_fqdn(self, mock_run):
        '''
        Verify that we fetch our fqdn correctly, stripping spaces.

        Note: utils.run_as returns utf-8 decoded strings.
        '''
        for s in [
                'foo',
                'foo  ',
                '   foo',
                '  foo  ', ]:
            mock_run.return_value = s
            self.assertEqual(get_fqdn(), 'foo')
    def test_get_fqdn(self, mock_run):
        '''
        Verify that we fetch our fqdn correctly, stripping spaces.

        Note: utils.run_as returns utf-8 decoded strings.
        '''
        for s in [
                'foo',
                'foo  ',
                '   foo',
                '  foo  ',
        ]:
            mock_run.return_value = s
            self.assertEqual(get_fqdn(), 'foo')
Esempio n. 12
0
def send_early_install_info(remote):
    """Send clients/slaves enough relation data to start their install.

    If slaves or clients join before the namenode is installed, we can still provide enough
    info to start their installation. This will help parallelize installation among our
    cluster.

    Note that slaves can safely install early, but should not start until the
    'namenode.ready' state is set by the dfs-slave interface.
    """
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    remote.send_namenodes([fqdn])
    remote.send_ports(hdfs_port, webhdfs_port)
Esempio n. 13
0
def send_early_install_info(remote):
    """Send clients/slaves enough relation data to start their install.

    If slaves or clients join before the namenode is installed, we can still provide enough
    info to start their installation. This will help parallelize installation among our
    cluster.

    Note that slaves can safely install early, but should not start until the
    'namenode.ready' state is set by the dfs-slave interface.
    """
    fqdn = get_fqdn()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')

    remote.send_namenodes([fqdn])
    remote.send_ports(hdfs_port, webhdfs_port)
Esempio n. 14
0
def send_early_install_info(remote):
    """Send clients/slaves enough relation data to start their install.

    If slaves or clients join before the resourcemanager is installed, we can
    still provide enough info to start their installation. This will help
    parallelize installation among our cluster.

    Note that slaves can safely install early, but should not start until the
    'resourcemanager.ready' state is set by the mapred-slave interface.
    """
    rm_host = get_fqdn()
    rm_ipc = get_layer_opts().port('resourcemanager')
    jh_ipc = get_layer_opts().port('jobhistory')
    jh_http = get_layer_opts().port('jh_webapp_http')

    remote.send_resourcemanagers([rm_host])
    remote.send_ports(rm_ipc, jh_http, jh_ipc)
Esempio n. 15
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
        # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0.
        overrides={
            'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
        }
    )
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # We need to create the 'mapred' and 'spark' user/group since we may not
    # be installing hadoop-mapreduce or spark on this machine. This is needed
    # so the namenode can access yarn and spark job history files in hdfs. Also
    # add our ubuntu user to the hadoop, mapred, and spark groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Esempio n. 16
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
        # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0.
        overrides={
            'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
        })
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Esempio n. 17
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
    )
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Esempio n. 18
0
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        # Hosts
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()

        # Ports
        rm_ipc = get_layer_opts().port('resourcemanager')
        rm_http = get_layer_opts().port('rm_webapp_http')
        jh_ipc = get_layer_opts().port('jobhistory')
        jh_http = get_layer_opts().port('jh_webapp_http')
        hdfs_port = namenode.port()
        webhdfs_port = namenode.webhdfs_port()

        bigtop = Bigtop()
        bigtop.render_site_yaml(
            hosts={
                'namenode': nn_host,
                'resourcemanager': rm_host,
            },
            roles=[
                'resourcemanager',
            ],
            # NB: When we colocate the NN and RM, the RM will run puppet apply
            # last. To ensure we don't lose any hdfs-site.xml data set by the
            # NN, override common_hdfs properties again here.
            overrides={
                'hadoop::common_yarn::hadoop_rm_port': rm_ipc,
                'hadoop::common_yarn::hadoop_rm_webapp_port': rm_http,
                'hadoop::common_yarn::hadoop_rm_bind_host': '0.0.0.0',
                'hadoop::common_mapred_app::mapreduce_jobhistory_host': '0.0.0.0',
                'hadoop::common_mapred_app::mapreduce_jobhistory_port': jh_ipc,
                'hadoop::common_mapred_app::mapreduce_jobhistory_webapp_port': jh_http,
                'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
                'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
                'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
                'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
                'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
            }
        )
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # We need to create the 'spark' user/group since we may not be
        # installing spark on this machine. This is needed so the history
        # server can access spark job history files in hdfs. Also add our
        # ubuntu user to the hadoop, mapred, and spark groups on this machine.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
Esempio n. 19
0
def send_fqdn():
    spark_master_host = get_fqdn()
    leadership.leader_set({'master-fqdn': spark_master_host})
    hookenv.log("Setting leader to {}".format(spark_master_host))