def send_info(datanode): hadoop = get_bigtop_base() # hdfs = HDFS(hadoop) # local_hostname = hookenv.local_unit().replace('/', '-') # hdfs_port = hadoop.dist_config.port('namenode') # webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts({node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() # datanode.send_spec(hadoop.spec()) # datanode.send_namenodes([local_hostname]) # datanode.send_ports(hdfs_port, webhdfs_port) # datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) # slaves = [node['host'] for node in datanode.nodes()] # if data_changed('namenode.slaves', slaves): # unitdata.kv().set('namenode.slaves', slaves) # hdfs.register_slaves(slaves) # hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format( # count=len(slaves), # s='s' if len(slaves) > 1 else '', # )) set_state('namenode.ready') hookenv.status_set('active', 'ready')
def send_info(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') utils.update_kv_hosts(nodemanager.hosts_map()) utils.manage_etc_hosts() nodemanager.send_spec(hadoop.spec()) nodemanager.send_resourcemanagers([local_hostname]) nodemanager.send_ports(port, hs_http, hs_ipc) nodemanager.send_ssh_key(utils.get_ssh_key('yarn')) nodemanager.send_hosts_map(utils.get_kv_hosts()) slaves = nodemanager.nodes() if data_changed('resourcemanager.slaves', slaves): unitdata.kv().set('resourcemanager.slaves', slaves) yarn.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('resourcemanager.ready')
def send_info(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') utils.update_kv_hosts({node['ip']: node['host'] for node in nodemanager.nodes()}) utils.manage_etc_hosts() nodemanager.send_spec(hadoop.spec()) nodemanager.send_resourcemanagers([local_hostname]) nodemanager.send_ports(port, hs_http, hs_ipc) nodemanager.send_ssh_key(utils.get_ssh_key('hdfs')) nodemanager.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in nodemanager.nodes()] if data_changed('resourcemanager.slaves', slaves): unitdata.kv().set('resourcemanager.slaves', slaves) yarn.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('resourcemanager.ready')
def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump({ 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port()) set_state('hadoop.hdfs.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace("/", "-") hdfs_port = hadoop.dist_config.port("namenode") webhdfs_port = hadoop.dist_config.port("nn_webapp_http") utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key("hdfs")) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node["host"] for node in datanode.nodes()] if data_changed("namenode.slaves", slaves): unitdata.kv().set("namenode.slaves", slaves) hdfs.register_slaves(slaves) hookenv.status_set( "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "") ) set_state("namenode.ready")
def configure_yarn(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() if not resourcemanager.resourcemanagers(): data = yaml.dump({ 'relation_name': resourcemanager.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in resourcemanager.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids( resourcemanager.relation_name ) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) yarn.configure_yarn_base( resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) set_state('hadoop.yarn.configured')
def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump( { 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port()) set_state('hadoop.hdfs.configured')
def configure_yarn(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() if not resourcemanager.resourcemanagers(): data = yaml.dump( { 'relation_name': resourcemanager.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in resourcemanager.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids( resourcemanager.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) yarn.configure_yarn_base(resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) set_state('hadoop.yarn.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts(datanode.hosts_map()) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = datanode.nodes() if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hdfs.refresh_slaves() hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts( {node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in datanode.nodes()] if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hookenv.status_set( 'active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def start_datanode(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_datanode(namenode.namenodes()[0], namenode.port()) utils.install_ssh_key('hdfs', namenode.ssh_key()) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() hdfs.start_datanode() hadoop.open_ports('datanode') set_state('datanode.started')
def configure_namenode(): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode(get_cluster_nodes()) hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.initialize_kv_host() utils.manage_etc_hosts() set_state('namenode.started')
def setup_kafka_config(self): ''' copy the default configuration files to kafka_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('kafka') / 'config' kafka_conf = self.dist_config.path('kafka_conf') kafka_conf.rmtree_p() default_conf.copytree(kafka_conf) # Now remove the conf included in the tarball and symlink our real conf # dir. we've seen issues where kafka still looks for config in # KAFKA_HOME/config. default_conf.rmtree_p() kafka_conf.symlink(default_conf) # Configure immutable bits kafka_bin = self.dist_config.path('kafka') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if kafka_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], kafka_bin]) env['LOG_DIR'] = self.dist_config.path('kafka_app_logs') # note: we set the advertised.host.name below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka public_address = hookenv.unit_get('public-address') private_ip = utils.resolve_private_address( hookenv.unit_get('private-address')) kafka_server_conf = self.dist_config.path( 'kafka_conf') / 'server.properties' service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) utils.re_edit_in_place( kafka_server_conf, { r'^broker.id=.*': 'broker.id=%s' % unit_num, r'^port=.*': 'port=%s' % self.dist_config.port('kafka'), r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'), r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address, }) kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties' utils.re_edit_in_place( kafka_log4j, { r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'), }) # fix for lxc containers and some corner cases in manual provider # ensure that public_address is resolvable internally by mapping it to the private IP utils.update_kv_host(private_ip, public_address) utils.manage_etc_hosts()
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager( resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def trigger_puppet(self): # If we can't reverse resolve the hostname (like on azure), support DN # registration by IP address. # NB: determine this *before* updating /etc/hosts below since # gethostbyaddr will not fail if we have an /etc/hosts entry. reverse_dns_bad = False try: socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip())) except socket.herror: reverse_dns_bad = True # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on # java to do a hostname lookup on clouds that have >64 char fqdns # (gce). Force short hostname (< 64 char) into /etc/hosts as workaround. # Better fix may be to move to java8. See http://paste.ubuntu.com/16230171/ # NB: do this before the puppet apply, which may call java stuffs # like format namenode, which will fail if we dont get this fix # down early. short_host = subprocess.check_output(['facter', 'hostname']).strip().decode() private_ip = utils.resolve_private_address(hookenv.unit_private_ip()) if short_host and private_ip: utils.update_kv_host(private_ip, short_host) utils.manage_etc_hosts() charm_dir = hookenv.charm_dir() # TODO JIRA KWM: rm does not need Hdfs_init and will fail rm_patch = Path(charm_dir) / 'resources/patch1_rm_init_hdfs.patch' # TODO JIRA KWM: nm should not *need* mapred role. we could patch it # with nm_patch, or adjust nm charm to include mapred role. for now, # we're doing the latter. todo rfc from dev@bigtop list. # nm_patch = Path(charm_dir) / 'resources/patch2_nm_core-site.patch' # TODO JIRA KWM: client role needs common_yarn for yarn-site.xml client_patch = Path(charm_dir) / 'resources/patch3_client_role_use_common_yarn.patch' with chdir("{}".format(self.bigtop_base)): # rm patch goes first utils.run_as('root', 'patch', '-p1', '-s', '-i', rm_patch) # skip nm_patch for now since nm charm is including mapred role # utils.run_as('root', 'patch', '-p1', '-s', '-i', nm_patch) # client patch goes last utils.run_as('root', 'patch', '-p1', '-s', '-i', client_patch) # TODO FIX ABOVE KWM # puppet apply needs to be ran where recipes were unpacked with chdir("{}".format(self.bigtop_base)): utils.run_as('root', 'puppet', 'apply', '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp') # Do any post-puppet config on the generated config files. if reverse_dns_bad: hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml') with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'false'
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager(resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def configure_ha(cluster, datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) cluster_nodes = cluster.nodes() jn_nodes = datanode.nodes() jn_port = datanode.jn_port() if data_changed('namenode.ha', [cluster_nodes, jn_nodes, jn_port]): utils.update_kv_hosts(cluster.hosts_map()) utils.manage_etc_hosts() hdfs.register_journalnodes(jn_nodes, jn_port) hdfs.restart_namenode() datanode.send_namenodes(cluster_nodes) if not is_state('namenode.shared-edits.init'): hdfs.init_sharededits() set_state('namenode.shared-edits.init')
def configure_hosts_file(self): """ Add the unit's private-address to /etc/hosts to ensure that Java can resolve the hostname of the server to its real IP address. We derive our hostname from the unit_id, replacing / with -. """ local_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) hostname = hookenv.local_unit().replace('/', '-') utils.update_kv_hosts({local_ip: hostname}) utils.manage_etc_hosts() # update name of host to more semantically meaningful value # (this is required on some providers; the /etc/hosts entry must match # the /etc/hostname lest Hadoop get confused about where certain things # should be run) etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname])
def trigger_puppet(self): """ Trigger Puppet to install the desired components. """ java_version = unitdata.kv().get('java_version', '') if java_version.startswith('1.7.') and len(get_fqdn()) > 64: # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on # java to do a hostname lookup on clouds that have >64 char FQDNs # (e.g., gce). Attempt to work around this by putting the (hopefully # short) hostname into /etc/hosts so that it will (hopefully) be # used instead (see http://paste.ubuntu.com/16230171/). # NB: do this before the puppet apply, which may call java stuffs # like format namenode, which will fail if we dont get this fix # down early. short_host = subprocess.check_output(['facter', 'hostname']).strip().decode() private_ip = utils.resolve_private_address(hookenv.unit_private_ip()) if short_host and private_ip: utils.update_kv_host(private_ip, short_host) utils.manage_etc_hosts() # puppet args are bigtop-version depedent if self.bigtop_version == '1.1.0': puppet_args = [ '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp' ] else: puppet_args = [ '-d', '--parser=future', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests' ] # puppet apply runs from the root of the bigtop release source with chdir(self.bigtop_base): utils.run_as('root', 'puppet', 'apply', *puppet_args) # Do any post-puppet config on the generated config files. utils.re_edit_in_place('/etc/default/bigtop-utils', { r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format( java_home()), })
def configure_hosts_file(self): """ Add the unit's private-address to /etc/hosts to ensure that Java can resolve the hostname of the server to its real IP address. We derive our hostname from the unit_id, replacing / with -. """ local_ip = utils.resolve_private_address( hookenv.unit_get('private-address')) hostname = hookenv.local_unit().replace('/', '-') utils.update_kv_hosts({local_ip: hostname}) utils.manage_etc_hosts() # update name of host to more semantically meaningful value # (this is required on some providers; the /etc/hosts entry must match # the /etc/hostname lest Hadoop get confused about where certain things # should be run) etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname])
def update_config(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() namenode_data = ( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port(), ) if data_changed('datanode.namenode-data', namenode_data): hdfs.configure_datanode(*namenode_data) if is_state('datanode.started'): # re-check because for manual call hdfs.restart_datanode() hdfs.restart_journalnode() if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()): utils.install_ssh_key('hdfs', namenode.ssh_key())
def unregister_nodemanager(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) slaves = unitdata.kv().get('resourcemanager.slaves', []) slaves_leaving = nodemanager.nodes() hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('resourcemanager.slaves', slaves_remaining) yarn.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: remove_state('resourcemanager.ready') nodemanager.dismiss()
def unregister_nodemanager(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) nodes_leaving = nodemanager.nodes() # only returns nodes in "leaving" state slaves = unitdata.kv().get('resourcemanager.slaves', []) slaves_leaving = [node['host'] for node in nodes_leaving] hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('resourcemanager.slaves', slaves_remaining) yarn.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: remove_state('resourcemanager.ready') nodemanager.dismiss()
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) slaves = unitdata.kv().get('namenode.slaves', []) slaves_leaving = datanode.nodes() # only returns nodes in "leaving" state hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('namenode.slaves', slaves_remaining) hdfs.register_slaves(slaves_remaining) hdfs.reload_slaves() utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: remove_state('namenode.ready') datanode.dismiss()
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) nodes_leaving = datanode.nodes() # only returns nodes in "leaving" state slaves = unitdata.kv().get("namenode.slaves", []) slaves_leaving = [node["host"] for node in nodes_leaving] hookenv.log("Slaves leaving: {}".format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set("namenode.slaves", slaves_remaining) hdfs.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: hookenv.status_set("blocked", "Waiting for relation to DataNodes") remove_state("namenode.ready") datanode.dismiss()
def unregister_datanode(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) nodes_leaving = datanode.nodes() # only returns nodes in "leaving" state slaves = unitdata.kv().get('namenode.slaves', []) slaves_leaving = [node['host'] for node in nodes_leaving] hookenv.log('Slaves leaving: {}'.format(slaves_leaving)) slaves_remaining = list(set(slaves) - set(slaves_leaving)) unitdata.kv().set('namenode.slaves', slaves_remaining) hdfs.register_slaves(slaves_remaining) utils.remove_kv_hosts(slaves_leaving) utils.manage_etc_hosts() if not slaves_remaining: hookenv.status_set('blocked', 'Waiting for relation to DataNodes') remove_state('namenode.ready') datanode.dismiss()
def install(self): version = hookenv.config()['spark_version'] spark_path = self.extract_spark_binary('spark-{}'.format(version), version) os.symlink(spark_path, self.dist_config.path('spark')) unitdata.kv().set('spark.version', version) self.dist_config.add_users() self.dist_config.add_dirs() self.dist_config.add_packages() # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) utils.initialize_kv_host() utils.manage_etc_hosts() hostname = hookenv.local_unit().replace('/', '-') etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname]) unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def setup_kafka_config(self): ''' copy the default configuration files to kafka_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('kafka') / 'config' kafka_conf = self.dist_config.path('kafka_conf') kafka_conf.rmtree_p() default_conf.copytree(kafka_conf) # Now remove the conf included in the tarball and symlink our real conf # dir. we've seen issues where kafka still looks for config in # KAFKA_HOME/config. default_conf.rmtree_p() kafka_conf.symlink(default_conf) # Similarly, we've seen issues where kafka wants to write to # KAFKA_HOME/logs regardless of the LOG_DIR, so make a symlink. default_logs = self.dist_config.path('kafka') / 'logs' kafka_logs = self.dist_config.path('kafka_app_logs') default_logs.rmtree_p() kafka_logs.symlink(default_logs) # Configure environment kafka_bin = self.dist_config.path('kafka') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if kafka_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], kafka_bin]) env['LOG_DIR'] = self.dist_config.path('kafka_app_logs') # Configure server.properties # note: we set the advertised.host.name below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka public_address = hookenv.unit_get('public-address') private_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) kafka_port = self.dist_config.port('kafka') kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties' service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) utils.re_edit_in_place(kafka_server_conf, { r'^broker.id=.*': 'broker.id=%s' % unit_num, r'^port=.*': 'port=%s' % kafka_port, r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'), r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address, }) # Configure producer.properties # note: we set the broker list host below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka. # It must match our advertised.host.name from above. kafka_producer_conf = self.dist_config.path('kafka_conf') / 'producer.properties' utils.re_edit_in_place(kafka_producer_conf, { r'^#?metadata.broker.list=.*': 'metadata.broker.list=%s:%s' % (public_address, kafka_port), }) # Configure log properties kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties' utils.re_edit_in_place(kafka_log4j, { r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'), }) template_name = 'upstart.conf' template_path = '/etc/init/kafka.conf' if host.init_is_systemd(): template_name = 'systemd.conf' template_path = '/etc/systemd/system/kafka.service' templating.render( template_name, template_path, context={ 'kafka_conf': self.dist_config.path('kafka_conf'), 'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka')) }, ) # fix for lxc containers and some corner cases in manual provider # ensure that public_address is resolvable internally by mapping it to the private IP utils.update_kv_host(private_ip, public_address) utils.manage_etc_hosts()
def manage_cluster_hosts(cluster): utils.update_kv_hosts(cluster.hosts_map()) utils.manage_etc_hosts()
def manage_datanode_hosts(datanode): utils.update_kv_hosts(datanode.hosts_map()) utils.manage_etc_hosts() datanode.send_hosts_map(utils.get_kv_hosts())