def update_peers(self, node_list): ''' This method wtill return True if the master peer was updated. False otherwise. ''' old_master = unitdata.kv().get('spark_master.ip', 'not_set') master_ip = '' if not node_list: hookenv.log("No peers yet. Acting as master.") master_ip = utils.resolve_private_address(hookenv.unit_private_ip()) nodes = [(hookenv.local_unit(), master_ip)] unitdata.kv().set('spark_all_master.ips', nodes) unitdata.kv().set('spark_master.ip', master_ip) else: # Use as master the node with minimum Id # Any ordering is fine here. Lexicografical ordering too. node_list.sort() master_ip = utils.resolve_private_address(node_list[0][1]) unitdata.kv().set('spark_master.ip', master_ip) unitdata.kv().set('spark_all_master.ips', node_list) hookenv.log("Updating master ip to {}.".format(master_ip)) unitdata.kv().set('spark_master.is_set', True) unitdata.kv().flush(True) # Incase of an HA setup adding peers must be treated as a potential # mastr change if (old_master != master_ip) or unitdata.kv().get('zookeepers.available', False): return True else: return False
def update_peers(self, node_list): ''' This method wtill return True if the master peer was updated. False otherwise. ''' old_master = unitdata.kv().get('spark_master.ip', 'not_set') master_ip = '' if not node_list: hookenv.log("No peers yet. Acting as master.") master_ip = utils.resolve_private_address( hookenv.unit_private_ip()) nodes = [(hookenv.local_unit(), master_ip)] unitdata.kv().set('spark_all_master.ips', nodes) unitdata.kv().set('spark_master.ip', master_ip) else: # Use as master the node with minimum Id # Any ordering is fine here. Lexicografical ordering too. node_list.sort() master_ip = utils.resolve_private_address(node_list[0][1]) unitdata.kv().set('spark_master.ip', master_ip) unitdata.kv().set('spark_all_master.ips', node_list) hookenv.log("Updating master ip to {}.".format(master_ip)) unitdata.kv().set('spark_master.is_set', True) unitdata.kv().flush(True) # Incase of an HA setup adding peers must be treated as a potential # mastr change if (old_master != master_ip) or unitdata.kv().get( 'zookeepers.available', False): return True else: return False
def hosts_map(self): local_host_name = hookenv.local_unit().replace('/', '-') local_ip = utils.resolve_private_address(hookenv.unit_private_ip()) result = {local_ip: local_host_name} for conv in self.conversations(): addr = conv.get_remote('private-address', '') ip = utils.resolve_private_address(addr) host_name = list(conv.units)[0].replace('/', '-') result.update({ip: host_name}) return result
def trigger_puppet(self): # If we can't reverse resolve the hostname (like on azure), support DN # registration by IP address. # NB: determine this *before* updating /etc/hosts below since # gethostbyaddr will not fail if we have an /etc/hosts entry. reverse_dns_bad = False try: socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip())) except socket.herror: reverse_dns_bad = True # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on # java to do a hostname lookup on clouds that have >64 char fqdns # (gce). Force short hostname (< 64 char) into /etc/hosts as workaround. # Better fix may be to move to java8. See http://paste.ubuntu.com/16230171/ # NB: do this before the puppet apply, which may call java stuffs # like format namenode, which will fail if we dont get this fix # down early. short_host = subprocess.check_output(['facter', 'hostname']).strip().decode() private_ip = utils.resolve_private_address(hookenv.unit_private_ip()) if short_host and private_ip: utils.update_kv_host(private_ip, short_host) utils.manage_etc_hosts() charm_dir = hookenv.charm_dir() # TODO JIRA KWM: rm does not need Hdfs_init and will fail rm_patch = Path(charm_dir) / 'resources/patch1_rm_init_hdfs.patch' # TODO JIRA KWM: nm should not *need* mapred role. we could patch it # with nm_patch, or adjust nm charm to include mapred role. for now, # we're doing the latter. todo rfc from dev@bigtop list. # nm_patch = Path(charm_dir) / 'resources/patch2_nm_core-site.patch' # TODO JIRA KWM: client role needs common_yarn for yarn-site.xml client_patch = Path(charm_dir) / 'resources/patch3_client_role_use_common_yarn.patch' with chdir("{}".format(self.bigtop_base)): # rm patch goes first utils.run_as('root', 'patch', '-p1', '-s', '-i', rm_patch) # skip nm_patch for now since nm charm is including mapred role # utils.run_as('root', 'patch', '-p1', '-s', '-i', nm_patch) # client patch goes last utils.run_as('root', 'patch', '-p1', '-s', '-i', client_patch) # TODO FIX ABOVE KWM # puppet apply needs to be ran where recipes were unpacked with chdir("{}".format(self.bigtop_base)): utils.run_as('root', 'puppet', 'apply', '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp') # Do any post-puppet config on the generated config files. if reverse_dns_bad: hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml') with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'false'
def configure_kafka(self, zk_units): # Get ip:port data from our connected zookeepers if not zk_units: # if we have no zookeepers, make sure kafka is stopped self.stop() else: zks = [] for remote_address, port in zk_units: ip = utils.resolve_private_address(remote_address) zks.append("%s:%s" % (ip, port)) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, })
def hosts_map(self): result = {} for conv in self.conversations(): ip = utils.resolve_private_address(conv.get_remote('private-address', '')) host_name = conv.scope.replace('/', '-') result.update({ip: host_name}) return result
def configure_kafka(self, zk_units, network_interface=None): # Get ip:port data from our connected zookeepers zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zks.sort() zk_connect = ",".join(zks) service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) kafka_port = self.dist_config.port('kafka') roles = ['kafka-server'] override = { 'kafka::server::broker_id': unit_num, 'kafka::server::port': kafka_port, 'kafka::server::zookeeper_connection_string': zk_connect, } if network_interface: ip = Bigtop().get_ip_for_interface(network_interface) override['kafka::server::bind_addr'] = ip bigtop = Bigtop() bigtop.render_site_yaml(roles=roles, overrides=override) bigtop.trigger_puppet() self.set_advertise() self.restart()
def configure_kafka(self, zk_units, network_interface=None): # Get ip:port data from our connected zookeepers zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place( cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place( cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # Possibly bind a network interface if network_interface: utils.re_edit_in_place( cfg, { r'^#?host.name=.*': 'host.name={}'.format( get_ip_for_interface(network_interface)), })
def configure_kafka(self, zk_units, network_interface=None): # Get ip:port data from our connected zookeepers zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # Possibly bind a network interface if network_interface: utils.re_edit_in_place(cfg, { r'^#?host.name=.*': 'host.name={}'.format( get_ip_for_interface(network_interface)), })
def am_i_registered(self): my_ip = utils.resolve_private_address( hookenv.unit_get('private-address')) my_hostname = hookenv.local_unit().replace('/', '-') unit, data = any_ready_unit(self.relation_name) etc_hosts = json.loads((data or {}).get('etc_hosts', '{}')) return etc_hosts.get(my_ip, None) == my_hostname
def client_present(client): if is_state('leadership.is_leader'): client.set_spark_started() spark = Spark() master_ip = utils.resolve_private_address(hookenv.unit_private_ip()) master_url = spark.get_master_url(master_ip) client.send_master_info(master_url, master_ip)
def get_zk_connect(self, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append(ip) zks.sort() return ",".join(zks)
def configure_kafka(self, zk_units, network_interface=None): # Get ip:port data from our connected zookeepers zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zks.sort() zk_connect = ",".join(zks) service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) kafka_port = self.dist_config.port('kafka') roles = ['kafka-server'] override = { 'kafka::server::broker_id': unit_num, 'kafka::server::port': kafka_port, 'kafka::server::zookeeper_connection_string': zk_connect, } if network_interface: ip = Bigtop().get_ip_for_interface(network_interface) override['kafka::server::bind_addr'] = ip bigtop = Bigtop() bigtop.render_site_yaml(roles=roles, overrides=override) bigtop.trigger_puppet() self.set_advertise() self.restart()
def get_zk_connect(self, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append(ip) zks.sort() return ",".join(zks)
def configure_kafka(self): # Get ip:port data from our connected zookeepers if Zookeeper().connected_units() and Zookeeper().is_ready(): zks = [] for unit, data in Zookeeper().filtered_data().items(): ip = utils.resolve_private_address(data['private-address']) zks.append("%s:%s" % (ip, data['port'])) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) else: # if we have no zookeepers, make sure kafka is stopped self.stop()
def nodes(self): return [{ 'host': conv.scope.replace('/', '-'), 'ip': utils.resolve_private_address( conv.get_remote('private-address', '')), } for conv in self.conversations()]
def nodes(self): return [ { 'host': conv.scope.replace('/', '-'), 'ip': utils.resolve_private_address(conv.get_remote('private-address', '')), } for conv in self.conversations() ]
def configure_resourcemanager(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_resourcemanager() yarn.configure_jobhistory() utils.update_kv_hosts({ip_addr: local_hostname}) set_state('resourcemanager.configured')
def configure_resourcemanager(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_resourcemanager() yarn.configure_jobhistory() utils.update_kv_hosts({ip_addr: local_hostname}) set_state('resourcemanager.configured')
def get_master(self): mode = hookenv.config()['spark_execution_mode'] master = None if mode.startswith('local') or mode == 'yarn-cluster': master = mode elif mode == 'standalone': local_ip = utils.resolve_private_address(hookenv.unit_private_ip()) master = 'spark://{}:7077'.format(local_ip) elif mode.startswith('yarn'): master = 'yarn-client' return master
def get_master(self): mode = hookenv.config()['spark_execution_mode'] master = None if mode.startswith('local') or mode == 'yarn-cluster': master = mode elif mode == 'standalone': local_ip = utils.resolve_private_address(hookenv.unit_private_ip()) master = 'spark://{}:7077'.format(local_ip) elif mode.startswith('yarn'): master = 'yarn-client' return master
def setup_kafka_config(self): ''' copy the default configuration files to kafka_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('kafka') / 'config' kafka_conf = self.dist_config.path('kafka_conf') kafka_conf.rmtree_p() default_conf.copytree(kafka_conf) # Now remove the conf included in the tarball and symlink our real conf # dir. we've seen issues where kafka still looks for config in # KAFKA_HOME/config. default_conf.rmtree_p() kafka_conf.symlink(default_conf) # Configure immutable bits kafka_bin = self.dist_config.path('kafka') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if kafka_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], kafka_bin]) env['LOG_DIR'] = self.dist_config.path('kafka_app_logs') # note: we set the advertised.host.name below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka public_address = hookenv.unit_get('public-address') private_ip = utils.resolve_private_address( hookenv.unit_get('private-address')) kafka_server_conf = self.dist_config.path( 'kafka_conf') / 'server.properties' service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) utils.re_edit_in_place( kafka_server_conf, { r'^broker.id=.*': 'broker.id=%s' % unit_num, r'^port=.*': 'port=%s' % self.dist_config.port('kafka'), r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'), r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address, }) kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties' utils.re_edit_in_place( kafka_log4j, { r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'), }) # fix for lxc containers and some corner cases in manual provider # ensure that public_address is resolvable internally by mapping it to the private IP utils.update_kv_host(private_ip, public_address) utils.manage_etc_hosts()
def restart_services(spark): master_info = spark.get_master_info() master_url = master_info['connection_string'] if data_changed('insightedge.master_url', master_url): master_ip = master_info['master'] local_ip = utils.resolve_private_address(hookenv.unit_private_ip()) is_master = master_ip == local_ip stop_datagrid_services() start_datagrid_services(master_url, is_master, not is_master or not spark.is_scaled()) set_state('insightedge.ready') hookenv.status_set('active', 'ready')
def setup_kafka_config(self): ''' copy the default configuration files to kafka_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('kafka') / 'config' kafka_conf = self.dist_config.path('kafka_conf') kafka_conf.rmtree_p() default_conf.copytree(kafka_conf) # Now remove the conf included in the tarball and symlink our real conf # dir. we've seen issues where kafka still looks for config in # KAFKA_HOME/config. default_conf.rmtree_p() kafka_conf.symlink(default_conf) # Configure immutable bits kafka_bin = self.dist_config.path('kafka') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if kafka_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], kafka_bin]) env['LOG_DIR'] = self.dist_config.path('kafka_app_logs') # note: we set the advertised.host.name below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka public_address = hookenv.unit_get('public-address') private_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties' service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) utils.re_edit_in_place(kafka_server_conf, { r'^broker.id=.*': 'broker.id=%s' % unit_num, r'^port=.*': 'port=%s' % self.dist_config.port('kafka'), r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'), r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address, }) kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties' utils.re_edit_in_place(kafka_log4j, { r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'), }) # fix for lxc containers and some corner cases in manual provider # ensure that public_address is resolvable internally by mapping it to the private IP utils.update_etc_hosts({private_ip: public_address}) templating.render( 'upstart.conf', '/etc/init/kafka.conf', context={ 'kafka_conf': self.dist_config.path('kafka_conf'), 'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka')) }, )
def configure_namenode(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.update_kv_hosts({ip_addr: local_hostname}) set_state('namenode.started')
def configure_namenode(): local_hostname = hookenv.local_unit().replace("/", "-") private_address = hookenv.unit_get("private-address") ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports("namenode") utils.update_kv_hosts({ip_addr: local_hostname}) set_state("namenode.started")
def configure_hosts_file(self): """ Add the unit's private-address to /etc/hosts to ensure that Java can resolve the hostname of the server to its real IP address. We derive our hostname from the unit_id, replacing / with -. """ local_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) hostname = hookenv.local_unit().replace('/', '-') utils.update_etc_hosts({local_ip: hostname}) # update name of host to more semantically meaningful value # (this is required on some providers; the /etc/hosts entry must match # the /etc/hostname lest Hadoop get confused about where certain things # should be run) etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname])
def configure(self, hosts, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit["host"]) zks.append(ip) zks.sort() zk_connect = ",".join(zks) roles = ["hbase-server", "hbase-master", "hbase-client"] override = { "hadoop_hbase::common_config::zookeeper_quorum": zk_connect, "hadoop_hbase::deploy::auxiliary": False, } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet()
def configure_ha(self, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) daemon_opts = ('-Dspark.deploy.recoveryMode=ZOOKEEPER ' '-Dspark.deploy.zookeeper.url={}'.format(zk_connect)) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' utils.re_edit_in_place(spark_env, { r'.*SPARK_DAEMON_JAVA_OPTS.*': 'SPARK_DAEMON_JAVA_OPTS=\"{}\"'.format(daemon_opts), r'.*SPARK_MASTER_IP.*': '# SPARK_MASTER_IP', }) unitdata.kv().set('zookeepers.available', True) unitdata.kv().flush(True)
def configure(self, hosts, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append(ip) zks.sort() zk_connect = ",".join(zks) roles = ['hbase-server', 'hbase-master', 'hbase-client'] override = { 'hadoop_hbase::common_config::zookeeper_quorum': zk_connect, 'hadoop_hbase::deploy::auxiliary': False } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet()
def trigger_puppet(self): """ Trigger Puppet to install the desired components. """ java_version = unitdata.kv().get('java_version', '') if java_version.startswith('1.7.') and len(get_fqdn()) > 64: # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on # java to do a hostname lookup on clouds that have >64 char FQDNs # (e.g., gce). Attempt to work around this by putting the (hopefully # short) hostname into /etc/hosts so that it will (hopefully) be # used instead (see http://paste.ubuntu.com/16230171/). # NB: do this before the puppet apply, which may call java stuffs # like format namenode, which will fail if we dont get this fix # down early. short_host = subprocess.check_output(['facter', 'hostname']).strip().decode() private_ip = utils.resolve_private_address(hookenv.unit_private_ip()) if short_host and private_ip: utils.update_kv_host(private_ip, short_host) utils.manage_etc_hosts() # puppet args are bigtop-version depedent if self.bigtop_version == '1.1.0': puppet_args = [ '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp' ] else: puppet_args = [ '-d', '--parser=future', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests' ] # puppet apply runs from the root of the bigtop release source with chdir(self.bigtop_base): utils.run_as('root', 'puppet', 'apply', *puppet_args) # Do any post-puppet config on the generated config files. utils.re_edit_in_place('/etc/default/bigtop-utils', { r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format( java_home()), })
def trigger_puppet(self): """ Trigger Puppet to install the desired components. """ java_version = unitdata.kv().get('java_version', '') if java_version.startswith('1.7.') and len(get_fqdn()) > 64: # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on # java to do a hostname lookup on clouds that have >64 char FQDNs # (e.g., gce). Attempt to work around this by putting the (hopefully # short) hostname into /etc/hosts so that it will (hopefully) be # used instead (see http://paste.ubuntu.com/16230171/). # NB: do this before the puppet apply, which may call java stuffs # like format namenode, which will fail if we dont get this fix # down early. short_host = subprocess.check_output(['facter', 'hostname']).strip().decode() private_ip = utils.resolve_private_address(hookenv.unit_private_ip()) if short_host and private_ip: utils.update_kv_host(private_ip, short_host) utils.manage_etc_hosts() # puppet args are bigtop-version depedent if self.bigtop_version == '1.1.0': puppet_args = [ '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp' ] else: puppet_args = [ '-d', '--parser=future', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests' ] # puppet apply runs from the root of the bigtop release source with chdir(self.bigtop_base): utils.run_as('root', 'puppet', 'apply', *puppet_args) # Do any post-puppet config on the generated config files. utils.re_edit_in_place('/etc/default/bigtop-utils', { r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format( java_home()), })
def configure_hosts_file(self): """ Add the unit's private-address to /etc/hosts to ensure that Java can resolve the hostname of the server to its real IP address. We derive our hostname from the unit_id, replacing / with -. """ local_ip = utils.resolve_private_address( hookenv.unit_get('private-address')) hostname = hookenv.local_unit().replace('/', '-') utils.update_kv_hosts({local_ip: hostname}) utils.manage_etc_hosts() # update name of host to more semantically meaningful value # (this is required on some providers; the /etc/hosts entry must match # the /etc/hostname lest Hadoop get confused about where certain things # should be run) etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname])
def configure_kafka(self, zk_units): # Get ip:port data from our connected zookeepers zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, })
def configure_ha(self, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) daemon_opts = ('-Dspark.deploy.recoveryMode=ZOOKEEPER ' '-Dspark.deploy.zookeeper.url={}'.format(zk_connect)) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' utils.re_edit_in_place( spark_env, { r'.*SPARK_DAEMON_JAVA_OPTS.*': 'SPARK_DAEMON_JAVA_OPTS=\"{}\"'.format(daemon_opts), r'.*SPARK_MASTER_IP.*': '# SPARK_MASTER_IP', }) unitdata.kv().set('zookeepers.available', True) unitdata.kv().flush(True)
def check_reverse_dns(self): """ Determine if reverse DNS lookups work on a machine. Some Hadoop services expect forward and reverse DNS to work. Not all clouds (eg, Azure) offer a working reverse-DNS environment. Additionally, we can assume any machine with a domainname of 'localdomain' does not have proper reverse-DNS capabilities. If either of these scenarios are present, set appropriate unit data so we can configure around this limitation. NB: call this *before* any /etc/hosts changes since gethostbyaddr will not fail if we have an /etc/hosts entry. """ reverse_dns_ok = True if is_localdomain(): reverse_dns_ok = False else: try: socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip())) except socket.herror: reverse_dns_ok = False unitdata.kv().set('reverse_dns_ok', reverse_dns_ok)
def check_reverse_dns(self): """ Determine if reverse DNS lookups work on a machine. Some Hadoop services expect forward and reverse DNS to work. Not all clouds (eg, Azure) offer a working reverse-DNS environment. Additionally, we can assume any machine with a domainname of 'localdomain' does not have proper reverse-DNS capabilities. If either of these scenarios are present, set appropriate unit data so we can configure around this limitation. NB: call this *before* any /etc/hosts changes since gethostbyaddr will not fail if we have an /etc/hosts entry. """ reverse_dns_ok = True if is_localdomain(): reverse_dns_ok = False else: try: socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip())) except socket.herror: reverse_dns_ok = False unitdata.kv().set('reverse_dns_ok', reverse_dns_ok)
def configure_kafka(self): # Get ip:port data from our connected zookeepers if Zookeeper().connected_units() and Zookeeper().is_ready(): zks = [] for unit, data in Zookeeper().filtered_data().items(): ip = utils.resolve_private_address(data['private-address']) zks.append("%s:%s" % (ip, data['port'])) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) else: # if we have no zookeepers, make sure kafka is stopped self.stop()
def register_connected_hosts(self): for unit, data in self.unfiltered_data().items(): ip = utils.resolve_private_address(data['private-address']) name = unit.replace('/', '-') utils.update_kv_host(ip, name)
def configure(self, available_hosts, zk_units, peers, extra_libs): """ This is the core logic of setting up spark. :param dict available_hosts: Hosts that Spark should know about. :param list zk_units: List of Zookeeper dicts with host/port info. :param list peers: List of Spark peer tuples (unit name, IP). :param list extra_libs: List of extra lib paths for driver/executors. """ # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address( available_hosts['spark-master']) master_url = self.get_master_url(master_ip) req_driver_mem = hookenv.config()['driver_memory'] req_executor_mem = hookenv.config()['executor_memory'] if mode.startswith('yarn'): spark_events = 'hdfs://{}'.format(dc.path('spark_events')) else: spark_events = 'file://{}'.format(dc.path('spark_events')) # handle tuning options that may be set as percentages driver_mem = '1g' executor_mem = '1g' if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "driver_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "executor_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: executor_mem = req_executor_mem # Some spark applications look for envars in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url env['SPARK_HOME'] = dc.path('spark_home') # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': spark_events, 'spark::common::history_log_dir': spark_events, 'spark::common::extra_lib_dirs': ':'.join(extra_libs) if extra_libs else None, 'spark::common::driver_mem': driver_mem, 'spark::common::executor_mem': executor_mem, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet. # NB: during an upgrade, we configure the site.yaml, but do not # trigger puppet. The user must do that with the 'reinstall' action. bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) if unitdata.kv().get('spark.version.repo', False): hookenv.log( "An upgrade is available and the site.yaml has been " "configured. Run the 'reinstall' action to continue.", level=hookenv.INFO) else: bigtop.trigger_puppet() self.patch_worker_master_url(master_ip, master_url) # Packages don't create the event dir by default. Do it each time # spark is (re)installed to ensure location/perms are correct. self.configure_events_dir(mode) # Handle examples and Spark-Bench. Do this each time this method is # called in case we need to act on a new resource or user config. self.configure_examples() self.configure_sparkbench()
def is_master(self): unit_ip = utils.resolve_private_address(hookenv.unit_private_ip()) master_ip = self.get_master_ip() return unit_ip == master_ip
def configure(self): ''' Configure spark environment for all users ''' spark_home = self.dist_config.path('spark') spark_bin = spark_home / 'bin' # handle tuning options that may be set as percentages driver_mem = '1g' req_driver_mem = hookenv.config()['driver_memory'] executor_mem = '1g' req_executor_mem = hookenv.config()['executor_memory'] if req_driver_mem.endswith('%'): if self.is_spark_local(): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("driver_memory percentage in non-local mode. Using 1g default.", level=None) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if self.is_spark_local(): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("executor_memory percentage in non-local mode. Using 1g default.", level=None) else: executor_mem = req_executor_mem # update environment variables with utils.environment_edit_in_place('/etc/environment') as env: if spark_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], spark_bin]) env['MASTER'] = self.get_master() env['PYSPARK_DRIVER_PYTHON'] = "ipython" env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf') env['SPARK_DRIVER_MEMORY'] = driver_mem env['SPARK_EXECUTOR_MEMORY'] = executor_mem env['SPARK_HOME'] = spark_home env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar" # update spark config spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf' utils.re_edit_in_place(spark_conf, { r'.*spark.master *.*': 'spark.master {}'.format(self.get_master()), r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled true', r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir hdfs:///user/ubuntu/directory', }) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' local_ip = utils.resolve_private_address(hookenv.unit_private_ip()) utils.re_edit_in_place(spark_env, { r'.*SPARK_DRIVER_MEMORY.*': 'SPARK_DRIVER_MEMORY={}'.format(driver_mem), r'.*SPARK_EXECUTOR_MEMORY.*': 'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem), r'.*SPARK_LOG_DIR.*': 'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')), r'.*SPARK_MASTER_IP.*': 'SPARK_MASTER_IP={}'.format(local_ip), r'.*SPARK_WORKER_DIR.*': 'SPARK_WORKER_DIR={}'.format(self.dist_config.path('spark_work')), }) # manage SparkBench install_sb = hookenv.config()['spark_bench_enabled'] sb_dir = '/home/ubuntu/spark-bench' if install_sb: if utils.cpu_arch() == 'ppc64le': sb_url = hookenv.config()['spark_bench_ppc64le'] else: # TODO: may need more arch cases (go with x86 sb for now) sb_url = hookenv.config()['spark_bench_x86_64'] Path(sb_dir).rmtree_p() fetcher = ArchiveUrlFetchHandler() fetcher.install(sb_url, '/home/ubuntu') # ##### # Handle glob if we use a .tgz that doesn't expand to sb_dir # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0] # SparkBench expects to live in ~/spark-bench, so put it there # Path(sb_archive_dir).rename(sb_dir) # ##### # comment out mem tunings (let them come from /etc/environment) sb_env = Path(sb_dir) / 'conf/env.sh' utils.re_edit_in_place(sb_env, { r'^SPARK_DRIVER_MEMORY.*': '# SPARK_DRIVER_MEMORY (use value from environment)', r'^SPARK_EXECUTOR_MEMORY.*': '# SPARK_EXECUTOR_MEMORY (use value from environment)', }) else: Path(sb_dir).rmtree_p()
def configure(self, available_hosts, zk_units, peers): """ This is the core logic of setting up spark. Two flags are needed: * Namenode exists aka HDFS is ready * Resource manager exists aka YARN is ready both flags are infered from the available hosts. :param dict available_hosts: Hosts that Spark should know about. """ # Bootstrap spark if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address(available_hosts['spark-master']) master_url = self.get_master_url(master_ip) # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # Do this after our puppet bits in case puppet overrides needed perms if 'namenode' not in available_hosts: # Local event dir (not in HDFS) needs to be 777 so non-spark # users can write job history there. It needs to be g+s so # all entries will be readable by spark (in the spark group). # It needs to be +t so users cannot remove files they don't own. dc.path('spark_events').chmod(0o3777) self.patch_worker_master_url(master_ip, master_url) # handle tuning options that may be set as percentages driver_mem = '1g' req_driver_mem = hookenv.config()['driver_memory'] executor_mem = '1g' req_executor_mem = hookenv.config()['executor_memory'] if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("driver_memory percentage in non-local mode. Using 1g default.", level=None) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("executor_memory percentage in non-local mode. Using 1g default.", level=None) else: executor_mem = req_executor_mem spark_env = '/etc/spark/conf/spark-env.sh' utils.re_edit_in_place(spark_env, { r'.*SPARK_DRIVER_MEMORY.*': 'export SPARK_DRIVER_MEMORY={}'.format(driver_mem), r'.*SPARK_EXECUTOR_MEMORY.*': 'export SPARK_EXECUTOR_MEMORY={}'.format(executor_mem), }, append_non_matches=True) # Install SB (subsequent calls will reconfigure existing install) # SparkBench looks for the spark master in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url self.install_benchmark()
def configure(self, available_hosts, zk_units, peers, extra_libs): """ This is the core logic of setting up spark. :param dict available_hosts: Hosts that Spark should know about. :param list zk_units: List of Zookeeper dicts with host/port info. :param list peers: List of Spark peer tuples (unit name, IP). :param list extra_libs: List of extra lib paths for driver/executors. """ # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address(available_hosts['spark-master']) master_url = self.get_master_url(master_ip) req_driver_mem = hookenv.config()['driver_memory'] req_executor_mem = hookenv.config()['executor_memory'] if mode.startswith('yarn'): spark_events = 'hdfs://{}'.format(dc.path('spark_events')) else: spark_events = 'file://{}'.format(dc.path('spark_events')) # handle tuning options that may be set as percentages driver_mem = '1g' executor_mem = '1g' if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("driver_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("executor_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: executor_mem = req_executor_mem # Some spark applications look for envars in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url env['SPARK_HOME'] = dc.path('spark_home') # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': spark_events, 'spark::common::history_log_dir': spark_events, 'spark::common::extra_lib_dirs': ':'.join(extra_libs) if extra_libs else None, 'spark::common::driver_mem': driver_mem, 'spark::common::executor_mem': executor_mem, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet. # NB: during an upgrade, we configure the site.yaml, but do not # trigger puppet. The user must do that with the 'reinstall' action. bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) if unitdata.kv().get('spark.version.repo', False): hookenv.log("An upgrade is available and the site.yaml has been " "configured. Run the 'reinstall' action to continue.", level=hookenv.INFO) else: bigtop.trigger_puppet() self.patch_worker_master_url(master_ip, master_url) # Packages don't create the event dir by default. Do it each time # spark is (re)installed to ensure location/perms are correct. self.configure_events_dir(mode) # Handle examples and Spark-Bench. Do this each time this method is # called in case we need to act on a new resource or user config. self.configure_examples() self.configure_sparkbench()
def register_connected_hosts(self): for unit, data in self.unfiltered_data().items(): ip = utils.resolve_private_address(data['private-address']) name = unit.replace('/', '-') utils.update_kv_host(ip, name)
def configure(self): ''' Configure spark environment for all users ''' spark_home = self.dist_config.path('spark') spark_bin = spark_home / 'bin' # handle tuning options that may be set as percentages driver_mem = '1g' req_driver_mem = hookenv.config()['driver_memory'] executor_mem = '1g' req_executor_mem = hookenv.config()['executor_memory'] if req_driver_mem.endswith('%'): if self.is_spark_local(): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "driver_memory percentage in non-local mode. Using 1g default.", level=None) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if self.is_spark_local(): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "executor_memory percentage in non-local mode. Using 1g default.", level=None) else: executor_mem = req_executor_mem # update environment variables with utils.environment_edit_in_place('/etc/environment') as env: if spark_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], spark_bin]) env['MASTER'] = self.get_master() env['PYSPARK_DRIVER_PYTHON'] = "ipython" env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf') env['SPARK_DRIVER_MEMORY'] = driver_mem env['SPARK_EXECUTOR_MEMORY'] = executor_mem env['SPARK_HOME'] = spark_home env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar" # update spark config spark_conf = self.dist_config.path( 'spark_conf') / 'spark-defaults.conf' utils.re_edit_in_place( spark_conf, { r'.*spark.master *.*': 'spark.master {}'.format(self.get_master()), r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled true', r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir hdfs:///user/ubuntu/directory', }) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' local_ip = utils.resolve_private_address(hookenv.unit_private_ip()) utils.re_edit_in_place( spark_env, { r'.*SPARK_DRIVER_MEMORY.*': 'SPARK_DRIVER_MEMORY={}'.format(driver_mem), r'.*SPARK_EXECUTOR_MEMORY.*': 'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem), r'.*SPARK_LOG_DIR.*': 'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')), r'.*SPARK_MASTER_IP.*': 'SPARK_MASTER_IP={}'.format(local_ip), r'.*SPARK_WORKER_DIR.*': 'SPARK_WORKER_DIR={}'.format( self.dist_config.path('spark_work')), }) # manage SparkBench install_sb = hookenv.config()['spark_bench_enabled'] sb_dir = '/home/ubuntu/spark-bench' if install_sb: if utils.cpu_arch() == 'ppc64le': sb_url = hookenv.config()['spark_bench_ppc64le'] else: # TODO: may need more arch cases (go with x86 sb for now) sb_url = hookenv.config()['spark_bench_x86_64'] Path(sb_dir).rmtree_p() fetcher = ArchiveUrlFetchHandler() fetcher.install(sb_url, '/home/ubuntu') # ##### # Handle glob if we use a .tgz that doesn't expand to sb_dir # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0] # SparkBench expects to live in ~/spark-bench, so put it there # Path(sb_archive_dir).rename(sb_dir) # ##### # comment out mem tunings (let them come from /etc/environment) sb_env = Path(sb_dir) / 'conf/env.sh' utils.re_edit_in_place( sb_env, { r'^SPARK_DRIVER_MEMORY.*': '# SPARK_DRIVER_MEMORY (use value from environment)', r'^SPARK_EXECUTOR_MEMORY.*': '# SPARK_EXECUTOR_MEMORY (use value from environment)', }) else: Path(sb_dir).rmtree_p()
def setup_kafka_config(self): ''' copy the default configuration files to kafka_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('kafka') / 'config' kafka_conf = self.dist_config.path('kafka_conf') kafka_conf.rmtree_p() default_conf.copytree(kafka_conf) # Now remove the conf included in the tarball and symlink our real conf # dir. we've seen issues where kafka still looks for config in # KAFKA_HOME/config. default_conf.rmtree_p() kafka_conf.symlink(default_conf) # Similarly, we've seen issues where kafka wants to write to # KAFKA_HOME/logs regardless of the LOG_DIR, so make a symlink. default_logs = self.dist_config.path('kafka') / 'logs' kafka_logs = self.dist_config.path('kafka_app_logs') default_logs.rmtree_p() kafka_logs.symlink(default_logs) # Configure environment kafka_bin = self.dist_config.path('kafka') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if kafka_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], kafka_bin]) env['LOG_DIR'] = self.dist_config.path('kafka_app_logs') # Configure server.properties # note: we set the advertised.host.name below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka public_address = hookenv.unit_get('public-address') private_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) kafka_port = self.dist_config.port('kafka') kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties' service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) utils.re_edit_in_place(kafka_server_conf, { r'^broker.id=.*': 'broker.id=%s' % unit_num, r'^port=.*': 'port=%s' % kafka_port, r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'), r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address, }) # Configure producer.properties # note: we set the broker list host below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka. # It must match our advertised.host.name from above. kafka_producer_conf = self.dist_config.path('kafka_conf') / 'producer.properties' utils.re_edit_in_place(kafka_producer_conf, { r'^#?metadata.broker.list=.*': 'metadata.broker.list=%s:%s' % (public_address, kafka_port), }) # Configure log properties kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties' utils.re_edit_in_place(kafka_log4j, { r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'), }) template_name = 'upstart.conf' template_path = '/etc/init/kafka.conf' if host.init_is_systemd(): template_name = 'systemd.conf' template_path = '/etc/systemd/system/kafka.service' templating.render( template_name, template_path, context={ 'kafka_conf': self.dist_config.path('kafka_conf'), 'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka')) }, ) # fix for lxc containers and some corner cases in manual provider # ensure that public_address is resolvable internally by mapping it to the private IP utils.update_kv_host(private_ip, public_address) utils.manage_etc_hosts()
def is_master(self): unit_ip = utils.resolve_private_address(hookenv.unit_private_ip()) master_ip = self.get_master_ip() return unit_ip == master_ip
def am_i_registered(self): my_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) my_hostname = hookenv.local_unit().replace('/', '-') unit, data = any_ready_unit(self.relation_name) etc_hosts = json.loads((data or {}).get('etc_hosts', '{}')) return etc_hosts.get(my_ip, None) == my_hostname
def configure(self, available_hosts, zk_units, peers, extra_libs): """ This is the core logic of setting up spark. :param dict available_hosts: Hosts that Spark should know about. :param list zk_units: List of Zookeeper dicts with host/port info. :param list peers: List of Spark peer tuples (unit name, IP). :param list extra_libs: List of extra lib paths for driver/executors. """ # Bootstrap spark if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address( available_hosts['spark-master']) master_url = self.get_master_url(master_ip) req_driver_mem = hookenv.config()['driver_memory'] req_executor_mem = hookenv.config()['executor_memory'] # handle tuning options that may be set as percentages driver_mem = '1g' executor_mem = '1g' if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "driver_memory percentage in non-local mode. Using 1g default.", level=None) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "executor_memory percentage in non-local mode. Using 1g default.", level=None) else: executor_mem = req_executor_mem # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() else: # Bigtop includes a default hadoop_head_node if we do not specify # any namenode info. To ensure spark standalone doesn't get # invalid hadoop config, set our NN to an empty string. hosts['namenode'] = '' if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, 'spark::common::extra_lib_dirs': ':'.join(extra_libs) if extra_libs else None, 'spark::common::driver_mem': driver_mem, 'spark::common::executor_mem': executor_mem, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # Do this after our puppet bits in case puppet overrides needed perms if 'namenode' not in available_hosts: # Local event dir (not in HDFS) needs to be 777 so non-spark # users can write job history there. It needs to be g+s so # all entries will be readable by spark (in the spark group). # It needs to be +t so users cannot remove files they don't own. dc.path('spark_events').chmod(0o3777) self.patch_worker_master_url(master_ip, master_url) # Install SB (subsequent calls will reconfigure existing install) # SparkBench looks for the spark master in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url self.install_benchmark()
def configure(self, available_hosts, zk_units, peers): """ This is the core logic of setting up spark. Two flags are needed: * Namenode exists aka HDFS is there * Resource manager exists aka YARN is ready both flags are infered from the available hosts. :param dict available_hosts: Hosts that Spark should know about. """ unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) master_ip = utils.resolve_private_address(available_hosts['spark-master']) hosts = { 'spark': master_ip, } dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] roles = self.get_roles() override = { 'spark::common::master_url': self.get_master_url(master_ip), 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = "" bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # There is a race condition here. # The work role will not start the first time we trigger puppet apply. # The exception in /var/logs/spark: # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077 # The master url is not set at the time the worker start the first time. # TODO(kjackal): ...do the needed... (investiate,debug,submit patch) bigtop.trigger_puppet() if 'namenode' not in available_hosts: # Local event dir (not in HDFS) needs to be 777 so non-spark # users can write job history there. It needs to be g+s so # all entries will be readable by spark (in the spark group). # It needs to be +t so users cannot remove files they don't own. dc.path('spark_events').chmod(0o3777) self.patch_worker_master_url(master_ip) # SparkBench looks for the spark master in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = self.get_master_url(master_ip) # Install SB (subsequent calls will reconfigure existing install) self.install_benchmark()