def install_spark_yarn(): """ Called in 'yarn-*' mode after Juju has elected a leader. The 'hadoop.yarn.ready' state must be set. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } hadoop = (RelationBase.from_state('hadoop.yarn.ready') or RelationBase.from_state('hadoop.hdfs.ready')) rms = hadoop.resourcemanagers() hosts['resourcemanager'] = rms[0] # Probably don't need to check this since yarn.ready implies hdfs.ready # for us, but it doesn't hurt. if is_state('hadoop.hdfs.ready'): nns = hadoop.namenodes() hosts['namenode'] = nns[0] # Always include native hadoop libs in yarn mode; add cuda libs if present. extra_libs = ["/usr/lib/hadoop/lib/native"] if is_state('cuda.installed'): extra_libs.append("/usr/local/cuda/lib64") spark = Spark() spark.configure(hosts, zk_units=None, peers=None, extra_libs=extra_libs) set_deployment_mode_state('spark.yarn.installed')
def install_spark_standalone(zks, peers): """ Called in local/standalone mode after Juju has elected a leader. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } # If zks have changed and we are not handling a departed spark peer, # give the ensemble time to settle. Otherwise we might try to start # spark master with data from the wrong zk leader. Doing so will cause # spark-master to shutdown: # https://issues.apache.org/jira/browse/SPARK-15544 if (zks and data_changed('zks', zks) and not is_state('sparkpeers.departed')): hookenv.status_set('maintenance', 'waiting for zookeeper ensemble to settle') hookenv.log( "Waiting 2m to ensure zk ensemble has settled: {}".format(zks)) time.sleep(120) # Let spark know if we have cuda libs installed. # NB: spark packages prereq hadoop (boo), so even in standalone mode, we'll # have hadoop libs installed. May as well include them in our lib path. extra_libs = ["/usr/lib/hadoop/lib/native"] if is_state('cuda.installed'): extra_libs.append("/usr/local/cuda/lib64") spark = Spark() spark.configure(hosts, zk_units=zks, peers=peers, extra_libs=extra_libs) set_deployment_mode_state('spark.standalone.installed')
def install_spark_standalone(zks, peers): """ Called in local/standalone mode after Juju has elected a leader. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } # If zks have changed and we are not handling a departed spark peer, # give the ensemble time to settle. Otherwise we might try to start # spark master with data from the wrong zk leader. Doing so will cause # spark-master to shutdown: # https://issues.apache.org/jira/browse/SPARK-15544 if (zks and data_changed('zks', zks) and not is_state('sparkpeers.departed')): hookenv.status_set('maintenance', 'waiting for zookeeper ensemble to settle') hookenv.log("Waiting 2m to ensure zk ensemble has settled: {}".format(zks)) time.sleep(120) # Let spark know if we have cuda libs installed. # NB: spark packages prereq hadoop (boo), so even in standalone mode, we'll # have hadoop libs installed. May as well include them in our lib path. extra_libs = ["/usr/lib/hadoop/lib/native"] if is_state('cuda.installed'): extra_libs.append("/usr/local/cuda/lib64") spark = Spark() spark.configure(hosts, zk_units=zks, peers=peers, extra_libs=extra_libs) set_deployment_mode_state('spark.standalone.installed')
def install_spark(hadoop=None): spark_master_host = leadership.leader_get('master-fqdn') hosts = { 'spark-master': spark_master_host, } if is_state('hadoop.yarn.ready'): rms = hadoop.resourcemanagers() hosts['resourcemanager'] = rms[0] if is_state('hadoop.hdfs.ready'): nns = hadoop.namenodes() hosts['namenode'] = nns[0] dist = get_dist_config() spark = Spark(dist) spark.configure(hosts)
def install_spark(hadoop=None, zks=None): spark_master_host = leadership.leader_get('master-fqdn') if not spark_master_host: hookenv.status_set('waiting', 'master not elected yet') return False hosts = { 'spark-master': spark_master_host, } if is_state('hadoop.yarn.ready'): rms = hadoop.resourcemanagers() hosts['resourcemanager'] = rms[0] if is_state('hadoop.hdfs.ready'): nns = hadoop.namenodes() hosts['namenode'] = nns[0] spark = Spark() spark.configure(hosts, zks, get_spark_peers()) return True
def install_spark_yarn(): """ Called in 'yarn-*' mode after Juju has elected a leader. The 'hadoop.yarn.ready' state must be set. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } hadoop = (RelationBase.from_state('hadoop.yarn.ready') or RelationBase.from_state('hadoop.hdfs.ready')) rms = hadoop.resourcemanagers() hosts['resourcemanager'] = rms[0] # Probably don't need to check this since yarn.ready implies hdfs.ready # for us, but it doesn't hurt. if is_state('hadoop.hdfs.ready'): nns = hadoop.namenodes() hosts['namenode'] = nns[0] spark = Spark() spark.configure(hosts, zk_units=None, peers=None) set_deployment_mode_state('spark.yarn.installed')
def install_spark_standalone(zks, peers): """ Called in local/standalone mode after Juju has elected a leader. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } # If zks have changed and we are not handling a departed spark peer, # give the ensemble time to settle. Otherwise we might try to start # spark master with data from the wrong zk leader. Doing so will cause # spark-master to shutdown: # https://issues.apache.org/jira/browse/SPARK-15544 if (zks and data_changed('zks', zks) and not is_state('sparkpeers.departed')): hookenv.status_set('maintenance', 'waiting for zookeeper ensemble to settle') hookenv.log("Waiting 2m to ensure zk ensemble has settled: {}".format(zks)) time.sleep(120) spark = Spark() spark.configure(hosts, zks, peers) set_deployment_mode_state('spark.standalone.installed')