def send_info(datanode):
    hadoop = get_bigtop_base()
    # hdfs = HDFS(hadoop)
    # local_hostname = hookenv.local_unit().replace('/', '-')
    # hdfs_port = hadoop.dist_config.port('namenode')
    # webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts({node['ip']: node['host']
                           for node in datanode.nodes()})
    utils.manage_etc_hosts()

    # datanode.send_spec(hadoop.spec())
    # datanode.send_namenodes([local_hostname])
    # datanode.send_ports(hdfs_port, webhdfs_port)
    # datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    # slaves = [node['host'] for node in datanode.nodes()]
    # if data_changed('namenode.slaves', slaves):
    #     unitdata.kv().set('namenode.slaves', slaves)
    #     hdfs.register_slaves(slaves)

    # hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format(
    #     count=len(slaves),
    #     s='s' if len(slaves) > 1 else '',
    # ))
    set_state('namenode.ready')
    hookenv.status_set('active', 'ready')
def send_info(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    port = hadoop.dist_config.port('resourcemanager')
    hs_http = hadoop.dist_config.port('jh_webapp_http')
    hs_ipc = hadoop.dist_config.port('jobhistory')

    utils.update_kv_hosts(nodemanager.hosts_map())
    utils.manage_etc_hosts()

    nodemanager.send_spec(hadoop.spec())
    nodemanager.send_resourcemanagers([local_hostname])
    nodemanager.send_ports(port, hs_http, hs_ipc)
    nodemanager.send_ssh_key(utils.get_ssh_key('yarn'))
    nodemanager.send_hosts_map(utils.get_kv_hosts())

    slaves = nodemanager.nodes()
    if data_changed('resourcemanager.slaves', slaves):
        unitdata.kv().set('resourcemanager.slaves', slaves)
        yarn.register_slaves(slaves)

    hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('resourcemanager.ready')
def send_info(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    port = hadoop.dist_config.port('resourcemanager')
    hs_http = hadoop.dist_config.port('jh_webapp_http')
    hs_ipc = hadoop.dist_config.port('jobhistory')

    utils.update_kv_hosts({node['ip']: node['host'] for node in nodemanager.nodes()})
    utils.manage_etc_hosts()

    nodemanager.send_spec(hadoop.spec())
    nodemanager.send_resourcemanagers([local_hostname])
    nodemanager.send_ports(port, hs_http, hs_ipc)
    nodemanager.send_ssh_key(utils.get_ssh_key('hdfs'))
    nodemanager.send_hosts_map(utils.get_kv_hosts())

    slaves = [node['host'] for node in nodemanager.nodes()]
    if data_changed('resourcemanager.slaves', slaves):
        unitdata.kv().set('resourcemanager.slaves', slaves)
        yarn.register_slaves(slaves)

    hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('resourcemanager.ready')
 def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump({
             'relation_name': namenode.relation_name,
             'conversations': {
                 conv.key: dict({'relation_ids': conv.relation_ids},
                                **conv.serialize(conv))
                 for conv in namenode.conversations()
             },
             'relation_data': {
                 rid: {
                     unit: hookenv.relation_get(unit=unit, rid=rid)
                     for unit in hookenv.related_units(rid)
                 } for rid in hookenv.relation_ids(namenode.relation_name)
             },
         }, default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(
         namenode.clustername(), namenode.namenodes(),
         namenode.port(), namenode.webhdfs_port())
     set_state('hadoop.hdfs.configured')
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace("/", "-")
    hdfs_port = hadoop.dist_config.port("namenode")
    webhdfs_port = hadoop.dist_config.port("nn_webapp_http")

    utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key("hdfs"))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node["host"] for node in datanode.nodes()]
    if data_changed("namenode.slaves", slaves):
        unitdata.kv().set("namenode.slaves", slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set(
        "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "")
    )
    set_state("namenode.ready")
Beispiel #6
0
 def configure_yarn(resourcemanager):
     hadoop = get_hadoop_base()
     yarn = YARN(hadoop)
     utils.update_kv_hosts(resourcemanager.hosts_map())
     utils.manage_etc_hosts()
     if not resourcemanager.resourcemanagers():
         data = yaml.dump({
             'relation_name': resourcemanager.relation_name,
             'conversations': {
                 conv.key: dict({'relation_ids': conv.relation_ids},
                                **conv.serialize(conv))
                 for conv in resourcemanager.conversations()
             },
             'relation_data': {
                 rid: {
                     unit: hookenv.relation_get(unit=unit, rid=rid)
                     for unit in hookenv.related_units(rid)
                 } for rid in hookenv.relation_ids(
                     resourcemanager.relation_name
                 )
             },
         }, default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     yarn.configure_yarn_base(
         resourcemanager.resourcemanagers()[0], resourcemanager.port(),
         resourcemanager.hs_http(), resourcemanager.hs_ipc())
     set_state('hadoop.yarn.configured')
Beispiel #7
0
 def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump(
             {
                 'relation_name': namenode.relation_name,
                 'conversations': {
                     conv.key: dict({'relation_ids': conv.relation_ids},
                                    **conv.serialize(conv))
                     for conv in namenode.conversations()
                 },
                 'relation_data': {
                     rid: {
                         unit: hookenv.relation_get(unit=unit, rid=rid)
                         for unit in hookenv.related_units(rid)
                     }
                     for rid in hookenv.relation_ids(namenode.relation_name)
                 },
             },
             default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port())
     set_state('hadoop.hdfs.configured')
Beispiel #8
0
 def configure_yarn(resourcemanager):
     hadoop = get_hadoop_base()
     yarn = YARN(hadoop)
     utils.update_kv_hosts(resourcemanager.hosts_map())
     utils.manage_etc_hosts()
     if not resourcemanager.resourcemanagers():
         data = yaml.dump(
             {
                 'relation_name': resourcemanager.relation_name,
                 'conversations': {
                     conv.key: dict({'relation_ids': conv.relation_ids},
                                    **conv.serialize(conv))
                     for conv in resourcemanager.conversations()
                 },
                 'relation_data': {
                     rid: {
                         unit: hookenv.relation_get(unit=unit, rid=rid)
                         for unit in hookenv.related_units(rid)
                     }
                     for rid in hookenv.relation_ids(
                         resourcemanager.relation_name)
                 },
             },
             default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     yarn.configure_yarn_base(resourcemanager.resourcemanagers()[0],
                              resourcemanager.port(),
                              resourcemanager.hs_http(),
                              resourcemanager.hs_ipc())
     set_state('hadoop.yarn.configured')
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts(datanode.hosts_map())
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = datanode.nodes()
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)
        hdfs.refresh_slaves()

    hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('namenode.ready')
Beispiel #10
0
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts(
        {node['ip']: node['host']
         for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node['host'] for node in datanode.nodes()]
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set(
        'active', 'Ready ({count} DataNode{s})'.format(
            count=len(slaves),
            s='s' if len(slaves) > 1 else '',
        ))
    set_state('namenode.ready')
def start_datanode(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_datanode(namenode.namenodes()[0], namenode.port())
    utils.install_ssh_key('hdfs', namenode.ssh_key())
    utils.update_kv_hosts(namenode.hosts_map())
    utils.manage_etc_hosts()
    hdfs.start_datanode()
    hadoop.open_ports('datanode')
    set_state('datanode.started')
def configure_namenode():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode(get_cluster_nodes())
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.initialize_kv_host()
    utils.manage_etc_hosts()
    set_state('namenode.started')
Beispiel #13
0
    def setup_kafka_config(self):
        '''
        copy the default configuration files to kafka_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('kafka') / 'config'
        kafka_conf = self.dist_config.path('kafka_conf')
        kafka_conf.rmtree_p()
        default_conf.copytree(kafka_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        # dir. we've seen issues where kafka still looks for config in
        # KAFKA_HOME/config.
        default_conf.rmtree_p()
        kafka_conf.symlink(default_conf)

        # Configure immutable bits
        kafka_bin = self.dist_config.path('kafka') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if kafka_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], kafka_bin])
            env['LOG_DIR'] = self.dist_config.path('kafka_app_logs')

        # note: we set the advertised.host.name below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka
        public_address = hookenv.unit_get('public-address')
        private_ip = utils.resolve_private_address(
            hookenv.unit_get('private-address'))
        kafka_server_conf = self.dist_config.path(
            'kafka_conf') / 'server.properties'
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        utils.re_edit_in_place(
            kafka_server_conf, {
                r'^broker.id=.*':
                'broker.id=%s' % unit_num,
                r'^port=.*':
                'port=%s' % self.dist_config.port('kafka'),
                r'^log.dirs=.*':
                'log.dirs=%s' % self.dist_config.path('kafka_data_logs'),
                r'^#?advertised.host.name=.*':
                'advertised.host.name=%s' % public_address,
            })

        kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties'
        utils.re_edit_in_place(
            kafka_log4j, {
                r'^kafka.logs.dir=.*':
                'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'),
            })

        # fix for lxc containers and some corner cases in manual provider
        # ensure that public_address is resolvable internally by mapping it to the private IP
        utils.update_kv_host(private_ip, public_address)
        utils.manage_etc_hosts()
def start_nodemanager(resourcemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_nodemanager(
        resourcemanager.resourcemanagers()[0], resourcemanager.port(),
        resourcemanager.hs_http(), resourcemanager.hs_ipc())
    utils.install_ssh_key('yarn', resourcemanager.ssh_key())
    utils.update_kv_hosts(resourcemanager.hosts_map())
    utils.manage_etc_hosts()
    yarn.start_nodemanager()
    hadoop.open_ports('nodemanager')
    set_state('nodemanager.started')
    def trigger_puppet(self):
        # If we can't reverse resolve the hostname (like on azure), support DN
        # registration by IP address.
        # NB: determine this *before* updating /etc/hosts below since
        # gethostbyaddr will not fail if we have an /etc/hosts entry.
        reverse_dns_bad = False
        try:
            socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip()))
        except socket.herror:
            reverse_dns_bad = True
        # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
        # java to do a hostname lookup on clouds that have >64 char fqdns
        # (gce). Force short hostname (< 64 char) into /etc/hosts as workaround.
        # Better fix may be to move to java8. See http://paste.ubuntu.com/16230171/
        # NB: do this before the puppet apply, which may call java stuffs
        # like format namenode, which will fail if we dont get this fix
        # down early.
        short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
        private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        if short_host and private_ip:
            utils.update_kv_host(private_ip, short_host)
            utils.manage_etc_hosts()

        charm_dir = hookenv.charm_dir()
        # TODO JIRA KWM: rm does not need Hdfs_init and will fail
        rm_patch = Path(charm_dir) / 'resources/patch1_rm_init_hdfs.patch'
        # TODO JIRA KWM: nm should not *need* mapred role. we could patch it
        # with nm_patch, or adjust nm charm to include mapred role. for now,
        # we're doing the latter. todo rfc from dev@bigtop list.
        # nm_patch = Path(charm_dir) / 'resources/patch2_nm_core-site.patch'
        # TODO JIRA KWM: client role needs common_yarn for yarn-site.xml
        client_patch = Path(charm_dir) / 'resources/patch3_client_role_use_common_yarn.patch'
        with chdir("{}".format(self.bigtop_base)):
            # rm patch goes first
            utils.run_as('root', 'patch', '-p1', '-s', '-i', rm_patch)
            # skip nm_patch for now since nm charm is including mapred role
            # utils.run_as('root', 'patch', '-p1', '-s', '-i', nm_patch)
            # client patch goes last
            utils.run_as('root', 'patch', '-p1', '-s', '-i', client_patch)
        # TODO FIX ABOVE KWM

        # puppet apply needs to be ran where recipes were unpacked
        with chdir("{}".format(self.bigtop_base)):
            utils.run_as('root', 'puppet', 'apply', '-d',
                         '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                         'bigtop-deploy/puppet/manifests/site.pp')

        # Do any post-puppet config on the generated config files.
        if reverse_dns_bad:
            hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
            with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
                props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'false'
def start_nodemanager(resourcemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_nodemanager(resourcemanager.resourcemanagers()[0],
                               resourcemanager.port(),
                               resourcemanager.hs_http(),
                               resourcemanager.hs_ipc())
    utils.install_ssh_key('yarn', resourcemanager.ssh_key())
    utils.update_kv_hosts(resourcemanager.hosts_map())
    utils.manage_etc_hosts()
    yarn.start_nodemanager()
    hadoop.open_ports('nodemanager')
    set_state('nodemanager.started')
def configure_ha(cluster, datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    cluster_nodes = cluster.nodes()
    jn_nodes = datanode.nodes()
    jn_port = datanode.jn_port()
    if data_changed('namenode.ha', [cluster_nodes, jn_nodes, jn_port]):
        utils.update_kv_hosts(cluster.hosts_map())
        utils.manage_etc_hosts()
        hdfs.register_journalnodes(jn_nodes, jn_port)
        hdfs.restart_namenode()
        datanode.send_namenodes(cluster_nodes)
        if not is_state('namenode.shared-edits.init'):
            hdfs.init_sharededits()
            set_state('namenode.shared-edits.init')
Beispiel #18
0
    def configure_hosts_file(self):
        """
        Add the unit's private-address to /etc/hosts to ensure that Java
        can resolve the hostname of the server to its real IP address.
        We derive our hostname from the unit_id, replacing / with -.
        """
        local_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
        hostname = hookenv.local_unit().replace('/', '-')
        utils.update_kv_hosts({local_ip: hostname})
        utils.manage_etc_hosts()

        # update name of host to more semantically meaningful value
        # (this is required on some providers; the /etc/hosts entry must match
        # the /etc/hostname lest Hadoop get confused about where certain things
        # should be run)
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
    def trigger_puppet(self):
        """
        Trigger Puppet to install the desired components.
        """
        java_version = unitdata.kv().get('java_version', '')
        if java_version.startswith('1.7.') and len(get_fqdn()) > 64:
            # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
            # java to do a hostname lookup on clouds that have >64 char FQDNs
            # (e.g., gce). Attempt to work around this by putting the (hopefully
            # short) hostname into /etc/hosts so that it will (hopefully) be
            # used instead (see http://paste.ubuntu.com/16230171/).
            # NB: do this before the puppet apply, which may call java stuffs
            # like format namenode, which will fail if we dont get this fix
            # down early.
            short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
            private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
            if short_host and private_ip:
                utils.update_kv_host(private_ip, short_host)
                utils.manage_etc_hosts()

        # puppet args are bigtop-version depedent
        if self.bigtop_version == '1.1.0':
            puppet_args = [
                '-d',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests/site.pp'
            ]
        else:
            puppet_args = [
                '-d',
                '--parser=future',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests'
            ]

        # puppet apply runs from the root of the bigtop release source
        with chdir(self.bigtop_base):
            utils.run_as('root', 'puppet', 'apply', *puppet_args)

        # Do any post-puppet config on the generated config files.
        utils.re_edit_in_place('/etc/default/bigtop-utils', {
            r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format(
                java_home()),
        })
    def trigger_puppet(self):
        """
        Trigger Puppet to install the desired components.
        """
        java_version = unitdata.kv().get('java_version', '')
        if java_version.startswith('1.7.') and len(get_fqdn()) > 64:
            # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
            # java to do a hostname lookup on clouds that have >64 char FQDNs
            # (e.g., gce). Attempt to work around this by putting the (hopefully
            # short) hostname into /etc/hosts so that it will (hopefully) be
            # used instead (see http://paste.ubuntu.com/16230171/).
            # NB: do this before the puppet apply, which may call java stuffs
            # like format namenode, which will fail if we dont get this fix
            # down early.
            short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
            private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
            if short_host and private_ip:
                utils.update_kv_host(private_ip, short_host)
                utils.manage_etc_hosts()

        # puppet args are bigtop-version depedent
        if self.bigtop_version == '1.1.0':
            puppet_args = [
                '-d',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests/site.pp'
            ]
        else:
            puppet_args = [
                '-d',
                '--parser=future',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests'
            ]

        # puppet apply runs from the root of the bigtop release source
        with chdir(self.bigtop_base):
            utils.run_as('root', 'puppet', 'apply', *puppet_args)

        # Do any post-puppet config on the generated config files.
        utils.re_edit_in_place('/etc/default/bigtop-utils', {
            r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format(
                java_home()),
        })
Beispiel #21
0
    def configure_hosts_file(self):
        """
        Add the unit's private-address to /etc/hosts to ensure that Java
        can resolve the hostname of the server to its real IP address.
        We derive our hostname from the unit_id, replacing / with -.
        """
        local_ip = utils.resolve_private_address(
            hookenv.unit_get('private-address'))
        hostname = hookenv.local_unit().replace('/', '-')
        utils.update_kv_hosts({local_ip: hostname})
        utils.manage_etc_hosts()

        # update name of host to more semantically meaningful value
        # (this is required on some providers; the /etc/hosts entry must match
        # the /etc/hostname lest Hadoop get confused about where certain things
        # should be run)
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
def update_config(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)

    utils.update_kv_hosts(namenode.hosts_map())
    utils.manage_etc_hosts()

    namenode_data = (
        namenode.clustername(), namenode.namenodes(),
        namenode.port(), namenode.webhdfs_port(),
    )
    if data_changed('datanode.namenode-data', namenode_data):
        hdfs.configure_datanode(*namenode_data)
        if is_state('datanode.started'):  # re-check because for manual call
            hdfs.restart_datanode()
            hdfs.restart_journalnode()

    if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()):
        utils.install_ssh_key('hdfs', namenode.ssh_key())
def unregister_nodemanager(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)

    slaves = unitdata.kv().get('resourcemanager.slaves', [])
    slaves_leaving = nodemanager.nodes()
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('resourcemanager.slaves', slaves_remaining)
    yarn.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        remove_state('resourcemanager.ready')

    nodemanager.dismiss()
def unregister_nodemanager(nodemanager):
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    nodes_leaving = nodemanager.nodes()  # only returns nodes in "leaving" state

    slaves = unitdata.kv().get('resourcemanager.slaves', [])
    slaves_leaving = [node['host'] for node in nodes_leaving]
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('resourcemanager.slaves', slaves_remaining)
    yarn.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        remove_state('resourcemanager.ready')

    nodemanager.dismiss()
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)

    slaves = unitdata.kv().get('namenode.slaves', [])
    slaves_leaving = datanode.nodes()  # only returns nodes in "leaving" state
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('namenode.slaves', slaves_remaining)
    hdfs.register_slaves(slaves_remaining)
    hdfs.reload_slaves()

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        remove_state('namenode.ready')

    datanode.dismiss()
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    nodes_leaving = datanode.nodes()  # only returns nodes in "leaving" state

    slaves = unitdata.kv().get("namenode.slaves", [])
    slaves_leaving = [node["host"] for node in nodes_leaving]
    hookenv.log("Slaves leaving: {}".format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set("namenode.slaves", slaves_remaining)
    hdfs.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        hookenv.status_set("blocked", "Waiting for relation to DataNodes")
        remove_state("namenode.ready")

    datanode.dismiss()
Beispiel #27
0
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    nodes_leaving = datanode.nodes()  # only returns nodes in "leaving" state

    slaves = unitdata.kv().get('namenode.slaves', [])
    slaves_leaving = [node['host'] for node in nodes_leaving]
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('namenode.slaves', slaves_remaining)
    hdfs.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        hookenv.status_set('blocked', 'Waiting for relation to DataNodes')
        remove_state('namenode.ready')

    datanode.dismiss()
    def install(self):
        version = hookenv.config()['spark_version']
        spark_path = self.extract_spark_binary('spark-{}'.format(version), version)
        os.symlink(spark_path, self.dist_config.path('spark'))
        unitdata.kv().set('spark.version', version)

        self.dist_config.add_users()
        self.dist_config.add_dirs()
        self.dist_config.add_packages()

        # allow ubuntu user to ssh to itself so spark can ssh to its worker
        # in local/standalone modes
        utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu'))

        utils.initialize_kv_host()
        utils.manage_etc_hosts()
        hostname = hookenv.local_unit().replace('/', '-')
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
        unitdata.kv().set('spark.installed', True)
        unitdata.kv().flush(True)
Beispiel #29
0
    def install(self):
        version = hookenv.config()['spark_version']
        spark_path = self.extract_spark_binary('spark-{}'.format(version),
                                               version)
        os.symlink(spark_path, self.dist_config.path('spark'))
        unitdata.kv().set('spark.version', version)

        self.dist_config.add_users()
        self.dist_config.add_dirs()
        self.dist_config.add_packages()

        # allow ubuntu user to ssh to itself so spark can ssh to its worker
        # in local/standalone modes
        utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu'))

        utils.initialize_kv_host()
        utils.manage_etc_hosts()
        hostname = hookenv.local_unit().replace('/', '-')
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
        unitdata.kv().set('spark.installed', True)
        unitdata.kv().flush(True)
Beispiel #30
0
    def setup_kafka_config(self):
        '''
        copy the default configuration files to kafka_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('kafka') / 'config'
        kafka_conf = self.dist_config.path('kafka_conf')
        kafka_conf.rmtree_p()
        default_conf.copytree(kafka_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        # dir. we've seen issues where kafka still looks for config in
        # KAFKA_HOME/config.
        default_conf.rmtree_p()
        kafka_conf.symlink(default_conf)

        # Similarly, we've seen issues where kafka wants to write to
        # KAFKA_HOME/logs regardless of the LOG_DIR, so make a symlink.
        default_logs = self.dist_config.path('kafka') / 'logs'
        kafka_logs = self.dist_config.path('kafka_app_logs')
        default_logs.rmtree_p()
        kafka_logs.symlink(default_logs)

        # Configure environment
        kafka_bin = self.dist_config.path('kafka') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if kafka_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], kafka_bin])
            env['LOG_DIR'] = self.dist_config.path('kafka_app_logs')

        # Configure server.properties
        # note: we set the advertised.host.name below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka
        public_address = hookenv.unit_get('public-address')
        private_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
        kafka_port = self.dist_config.port('kafka')
        kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties'
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        utils.re_edit_in_place(kafka_server_conf, {
            r'^broker.id=.*': 'broker.id=%s' % unit_num,
            r'^port=.*': 'port=%s' % kafka_port,
            r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'),
            r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address,
        })

        # Configure producer.properties
        # note: we set the broker list host below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka.
        # It must match our advertised.host.name from above.
        kafka_producer_conf = self.dist_config.path('kafka_conf') / 'producer.properties'
        utils.re_edit_in_place(kafka_producer_conf, {
            r'^#?metadata.broker.list=.*': 'metadata.broker.list=%s:%s' % (public_address, kafka_port),
        })

        # Configure log properties
        kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties'
        utils.re_edit_in_place(kafka_log4j, {
            r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'),
        })

        template_name = 'upstart.conf'
        template_path = '/etc/init/kafka.conf'
        if host.init_is_systemd():
            template_name = 'systemd.conf'
            template_path = '/etc/systemd/system/kafka.service'

        templating.render(
            template_name,
            template_path,
            context={
                'kafka_conf': self.dist_config.path('kafka_conf'),
                'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka'))
            },
        )

        # fix for lxc containers and some corner cases in manual provider
        # ensure that public_address is resolvable internally by mapping it to the private IP
        utils.update_kv_host(private_ip, public_address)
        utils.manage_etc_hosts()
def manage_cluster_hosts(cluster):
    utils.update_kv_hosts(cluster.hosts_map())
    utils.manage_etc_hosts()
def manage_datanode_hosts(datanode):
    utils.update_kv_hosts(datanode.hosts_map())
    utils.manage_etc_hosts()
    datanode.send_hosts_map(utils.get_kv_hosts())