def install(self): ''' Fetch resources ''' self.dist_config.add_users() self.dist_config.add_dirs() result = resource_get('tomee') if not result: log("Failed to fetch TomEE resource") return False unitdata.kv().set("tomeetarball", result) log("TomEE tarball path is {}".format(result)) tomee_install_dir = self.dist_config.path('tomee_dir') with chdir(tomee_install_dir): utils.run_as('tomcat', 'tar', '-zxvf', '{}'.format(result)) tomee_dirs = [f for f in os.listdir(tomee_install_dir) if f.startswith('apache-tomee')] catalina_home = os.path.join(tomee_install_dir, tomee_dirs[0]) with utils.environment_edit_in_place('/etc/environment') as env: env['CATALINA_HOME'] = catalina_home unitdata.kv().set("catalina_home", catalina_home) self.open_ports() return True
def setup_init_scripts(self): if host.init_is_systemd(): template_path = '/etc/systemd/system/zeppelin.service' template_name = 'systemd.conf' else: template_path = '/etc/init/zeppelin.conf' template_name = 'upstart.conf' if os.path.exists(template_path): template_path_backup = "{}.backup".format(template_path) if os.path.exists(template_path_backup): os.remove(template_path_backup) os.rename(template_path, template_path_backup) render( template_name, template_path, context={ 'zeppelin_home': self.dist_config.path('zeppelin'), 'zeppelin_conf': self.dist_config.path('zeppelin_conf') }, ) if host.init_is_systemd(): utils.run_as('root', 'systemctl', 'enable', 'zeppelin.service') utils.run_as('root', 'systemctl', 'daemon-reload')
def configure_remote_db(self, mysql): hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' jdbc_url = \ "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format( mysql.host(), mysql.port(), mysql.database() ) with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = jdbc_url props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = \ "com.mysql.jdbc.Driver" hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': ('export HIVE_AUX_JARS_PATH=' '/usr/share/java/mysql-connector-java.jar'), }) # Now that we have db connection info, init our schema (only once) remote_db = hookenv.remote_service_name() if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db): tool_path = "{}/bin/schematool".format( self.dist_config.path('hive')) utils.run_as( 'ubuntu', tool_path, '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True) unitdata.kv().flush(True)
def configure(self): ''' Configure spark environment for all users ''' spark_home = self.dist_config.path('spark') spark_bin = spark_home / 'bin' # put our jar in hdfs spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(spark_home))[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: print ("File exists") # update environment variables with utils.environment_edit_in_place('/etc/environment') as env: if spark_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], spark_bin]) env['MASTER'] = hookenv.config('spark_execution_mode') env['PYSPARK_DRIVER_PYTHON'] = "ipython" env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf') env['SPARK_HOME'] = spark_home env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar" # update spark config spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf' utils.re_edit_in_place(spark_conf, { r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled true', r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir hdfs:///user/ubuntu/directory', })
def configure_remote_db(self, mysql): hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' jdbc_url = \ "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format( mysql.host(), mysql.port(), mysql.database() ) with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = jdbc_url props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = \ "com.mysql.jdbc.Driver" hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place( hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': ('export HIVE_AUX_JARS_PATH=' '/usr/share/java/mysql-connector-java.jar'), }) # Now that we have db connection info, init our schema (only once) remote_db = hookenv.remote_service_name() if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db): tool_path = "{}/bin/schematool".format( self.dist_config.path('hive')) utils.run_as('ubuntu', tool_path, '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True) unitdata.kv().flush(True)
def setup_hive_config(self): ''' copy the default configuration files to hive_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('hive') / 'conf' hive_conf = self.dist_config.path('hive_conf') hive_conf.rmtree_p() default_conf.copytree(hive_conf) # Configure immutable bits hive_bin = self.dist_config.path('hive') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if hive_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hive_bin]) env['HIVE_CONF_DIR'] = self.dist_config.path('hive_conf') hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' if not hive_env.exists(): (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy(hive_env) hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' if not hive_site.exists(): (self.dist_config.path('hive_conf') / 'hive-default.xml.template').copy(hive_site) with utils.xmlpropmap_edit_in_place(hive_site) as props: # TODO (kwm): we should be able to export java.io.tmpdir so these 4 arent needed props['hive.exec.local.scratchdir'] = "/tmp/hive" props['hive.downloaded.resources.dir'] = "/tmp/hive_resources" props['hive.querylog.location'] = "/tmp/hive" props['hive.server2.logging.operation.log.location'] = "/tmp/hive" #### # create hdfs storage space utils.run_as('hive', 'hdfs', 'dfs', '-mkdir', '-p', '/user/hive/warehouse')
def stop(self): hookenv.log("Stopping HUE and Supervisor process") try: utils.run_as('hue', 'pkill', '-9', 'supervisor') utils.run_as('hue', 'pkill', '-9', 'hue') except subprocess.CalledProcessError: return
def configure_hive(self, mysql): config = hookenv.config() hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props[ 'javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format( mysql.host(), mysql.port(), mysql.database()) props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props[ 'javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver" props[ 'hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[ self.cpu_arch] hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place( hive_env, { r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'], r'.*export HIVE_AUX_JARS_PATH *=.*': 'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar', }) # Now that we have db connection info, init our schema (only once) if not unitdata.kv().get('hive.schema.initialized'): utils.run_as('hive', 'schematool', '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized', True)
def stop(self): zeppelin_conf = self.dist_config.path('zeppelin_conf') zeppelin_home = self.dist_config.path('zeppelin') # TODO: try/catch existence of zeppelin-daemon.sh. Stop hook will fail # if we try to destroy a deployment that didn't finish installing. utils.run_as('ubuntu', '{}/bin/zeppelin-daemon.sh'.format(zeppelin_home), '--config', zeppelin_conf, 'stop')
def trigger_puppet(self): # TODO need to either manage the apt keys from Juju or # update upstream Puppet recipes to install them along with apt source # puppet apply needs to be ran where recipes were unpacked with chdir("{0}/{1}".format(self.bigtop_dir, self.bigtop_version)): utils.run_as('root', 'puppet', 'apply', '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp')
def soft_restart(self): hookenv.log("Restarting HUE with Supervisor process") try: utils.run_as('hue', 'pkill', '-9', 'hue') except subprocess.CalledProcessError: hookenv.log("Problem with Supervisor process, doing hard HUE restart") self.stop() self.start()
def init_zkrest(self): # Zookeeper user needs to compile the rest contrib server. # So zookeeper needs to: # 1. Have a home dir for ant cache to exist # 2. Write to the /usr/lib/zookeeper chownr(self.dist_config.path('zookeeper'), 'zookeeper', 'zookeeper', chowntopdir=True) with chdir(self.dist_config.path('zookeeper')): utils.run_as('zookeeper', 'ant') unitdata.kv().set('rest.initialised', True)
def setup_hdfs_logs(self): # create hdfs storage space for history server dc = self.dist_config events_dir = dc.path('spark_events') events_dir = 'hdfs://{}'.format(events_dir) utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir) utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:spark', events_dir) return events_dir
def install_puppet_modules(self): # Install required modules charm_dir = Path(hookenv.charm_dir()) for module in sorted(glob('resources/puppet-modules/*.tar.gz')): # Required modules are included in the charms to support network # restricted deployment. Using force install / ignore deps prevents # puppet from calling out to https://forgeapi.puppetlabs.com. utils.run_as('root', 'puppet', 'module', 'install', '--force', '--ignore-dependencies', charm_dir / module)
def configure_hadoop(self): # create hdfs storage space utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/zeppelin') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', 'zeppelin', '/user/zeppelin') # If spark is ready, let it handle the spark_master_url. Otherwise, # zepp is in local mode; set it to yarn-client since hadoop is here. if not is_state('spark.ready'): self._add_override('zeppelin::server::spark_master_url', 'yarn-client') self.trigger_bigtop()
def deploy(self): ''' Just install the two deb packages. Should throw an exception in case installation fails. ''' udedeb = unitdata.kv().get("udedeb") utils.run_as('root', 'dpkg', '-i', '{}'.format(udedeb)) mariadeb = unitdata.kv().get("mariadeb") utils.run_as('root', 'dpkg', '-i', '{}'.format(mariadeb))
def start(self): # Start if we're not already running. We currently dont have any # runtime config options, so no need to restart when hooks fire. if not utils.jps("livy"): livy_log = self.dist_config.path("livy_logs") + "livy-server.log" livy_home = self.dist_config.path("livy") # chdir here because things like zepp tutorial think ZEPPELIN_HOME # is wherever the daemon was started from. os.chdir(livy_home) utils.run_as("hue", "./bin/livy-server", "2>&1", livy_log, "&")
def start(self): # Start if we're not already running. We currently dont have any # runtime config options, so no need to restart when hooks fire. if not utils.jps("zeppelin"): zeppelin_conf = self.dist_config.path('zeppelin_conf') zeppelin_home = self.dist_config.path('zeppelin') # chdir here because things like zepp tutorial think ZEPPELIN_HOME # is wherever the daemon was started from. os.chdir(zeppelin_home) utils.run_as('ubuntu', '{}/bin/zeppelin-daemon.sh'.format(zeppelin_home), '--config', zeppelin_conf, 'start')
def configure(self, available_hosts): """ This is the core logic of setting up spark. Two flags are needed: * Namenode exists aka HDFS is there * Resource manager exists aka YARN is ready both flags are infered from the available hosts. :param dict available_hosts: Hosts that Spark should know about. """ if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) self.install_benchmark() hosts = { 'spark': available_hosts['spark-master'], } dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] roles = self.get_roles() override = { 'spark::common::master_url': self.get_master_url(available_hosts['spark-master']), 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # There is a race condition here. # The work role will not start the first time we trigger puppet apply. # The exception in /var/logs/spark: # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077 # The master url is not set at the time the worker start the first time. # TODO(kjackal): ...do the needed... (investiate,debug,submit patch) bigtop.trigger_puppet() if 'namenode' not in available_hosts: # Make sure users other than spark can access the events logs dir and run jobs utils.run_as('root', 'chmod', '777', dc.path('spark_events'))
def configure_hadoop(self): # create hdfs storage space utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/zeppelin') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', 'zeppelin', '/user/zeppelin') # If spark is ready, let configure_spark() trigger bigtop. Otherwise, # put our spark in yarn-client mode since hadoop is here. if not is_state('spark.ready'): self._add_override('spark::common::master_url', 'yarn-client') self._add_override('zeppelin::server::spark_master_url', 'yarn-client') self.trigger_bigtop()
def run_smoke_tests(self, smoke_components=None, smoke_env=None): """ Run the Bigtop smoke tests for given components using the gradle wrapper script. :param list smoke_components: Bigtop components to smoke test :param list smoke_env: Dict of required environment variables (merged with /etc/environment) """ if not is_state('bigtop.available'): hookenv.log('Bigtop is not ready to run smoke tests') return None if not smoke_components: hookenv.log('Missing Bigtop smoke test component list') return None # We always need TERM and JAVA_HOME; merge with any user provided dict subprocess_env = {'TERM': 'dumb', 'JAVA_HOME': java_home()} if isinstance(smoke_env, dict): subprocess_env.update(smoke_env) # Ensure the base dir is owned by ubuntu so we can create a .gradle dir. chownr(self.bigtop_base, 'ubuntu', 'ubuntu', chowntopdir=True) # Gradle doesn't honor env proxies; check for either http* or HTTP* and # set cli args as needed. http_url = os.environ.get('http_proxy', os.environ.get('HTTP_PROXY')) https_url = os.environ.get('https_proxy', os.environ.get('HTTPS_PROXY')) proxy_args = [] if http_url: parsed_url = urlparse(http_url) proxy_args += ['-Dhttp.proxyHost={}'.format(parsed_url.hostname), '-Dhttp.proxyPort={}'.format(parsed_url.port)] if https_url: parsed_url = urlparse(https_url) proxy_args += ['-Dhttps.proxyHost={}'.format(parsed_url.hostname), '-Dhttps.proxyPort={}'.format(parsed_url.port)] # Bigtop can run multiple smoke tests at once; construct the right args. comp_args = ['bigtop-tests:smoke-tests:%s:test' % c for c in smoke_components] gradlew_args = ['-Psmoke.tests', '--info'] + proxy_args + comp_args hookenv.log('Bigtop smoke test environment: {}'.format(subprocess_env)) hookenv.log('Bigtop smoke test args: {}'.format(gradlew_args)) with chdir(self.bigtop_base): try: utils.run_as('ubuntu', './gradlew', *gradlew_args, env=subprocess_env) smoke_out = 'success' except subprocess.CalledProcessError as e: smoke_out = e.output return smoke_out
def install_burrow(): # Install dep https://github.com/golang/dep url = "https://raw.githubusercontent.com/golang/dep/master/install.sh" wget.download(url=url, out='/home/ubuntu/dep-installer.sh') os.chmod('/home/ubuntu/dep-installer.sh', 0o755) try: #output = run('/home/ubuntu/dep-installer.sh') #output.check_returncode() utils.run_as('root', '/home/ubuntu/dep-installer.sh') except CalledProcessError as e: log(e) status_set('blocked', 'Failed to install dep.') return previous_wd = os.getcwd() go_env = go_environment() utils.run_as('ubuntu', 'go', 'get', 'github.com/linkedin/Burrow') os.chdir(go_env['GOPATH'] + '/src/github.com/linkedin/Burrow') utils.run_as('ubuntu', 'dep', 'ensure') utils.run_as('ubuntu', 'go', 'install') dirs = [ '/home/ubuntu/burrow', '/home/ubuntu/burrow/log', '/home/ubuntu/burrow/config' ] for dir in dirs: if not os.path.exists(dir): os.makedirs(dir) os.chdir(previous_wd) set_flag('burrow.installed')
def start(self): """ Override start to use InsightEdge's wrapper. """ # Start if we're not already running. We currently dont have any # runtime config options, so no need to restart when hooks fire. if not utils.jps("zeppelin"): ie_home = self.dist_config.path('insightedge') zeppelin_home = self.dist_config.path('zeppelin') # chdir here because things like zepp tutorial think ZEPPELIN_HOME # is wherever the daemon was started from. with host.chdir(zeppelin_home): utils.run_as('ubuntu', '{}/sbin/start-zeppelin.sh'.format(ie_home)) # wait up to 30s for API to start, lest requests fail self.wait_for_api(30)
def install(self): ''' Perform initial one-time setup, workaround upstream bugs, and trigger puppet. ''' # Dirs are handled by the bigtop deb, so no need to call out to # dist_config to do that work. However, we want to adjust the # groups for the `ubuntu` user for better interaction with Juju. self.dist_config.add_users() # Set ports based on layer.yaml options self._add_override('zeppelin::server::server_port', self.dist_config.port('zeppelin')) self._add_override('zeppelin::server::web_socket_port', self.dist_config.port('zeppelin_web')) # Default spark to local mode on initial install. This will be # reconfigured if/when hadoop or spark relations are made. self._add_override('zeppelin::server::spark_master_url', 'local[*]') ########## # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') ########## self.trigger_bigtop() ########## # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place(zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30) ########## ########## # BUG: BIGTOP-2154 # The zep deb depends on spark-core and spark-python. However, because # of the unholy requirement to have hive tightly coupled to spark, # we need to ensure spark-datanucleus is installed. Do this after the # initial install so the bigtop repo is available to us. utils.run_as('root', 'apt-get', 'install', '-qy', 'spark-datanucleus')
def update_zoo_cfg(zkid=getid(local_unit()), ip=unit_private_ip(), remove=False): ''' Configuration for a Zookeeper quorum requires listing all unique servers (server.X=<ip>:2888:3888) in the zoo.cfg. This function updates server.X entries and restarts the zookeeper service. ''' zookeeper_cfg = "{}/zoo.cfg".format(os.environ.get('ZOOCFGDIR', '/etc/zookeeper/conf')) key = "server.{}".format(zkid) value = "={}:2888:3888".format(ip) if remove: removeKV(zookeeper_cfg, key) return addKV(zookeeper_cfg, key, value) # restart the zk server after alterting zoo.cfg zookeeper_bin = os.environ.get('ZOO_BIN_DIR', '/usr/lib/zookeeper/bin') utils.run_as('zookeeper', '{}/zkServer.sh'.format(zookeeper_bin), 'restart')
def install(self, force=False): if not force and self.is_installed(): return self.dist_config.add_dirs() self.dist_config.add_packages() jujuresources.install(self.resources['spark'], destination=self.dist_config.path('spark'), skip_top_level=True) self.setup_spark_config() self.install_demo() # create hdfs storage space utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory') unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def stop(self): """ Override start to use InsightEdge's wrapper. """ # Start if we're not already running. We currently dont have any # runtime config options, so no need to restart when hooks fire. if not utils.jps("zeppelin"): ie_home = self.dist_config.path('insightedge') zeppelin_home = self.dist_config.path('zeppelin') # chdir here because things like zepp tutorial think ZEPPELIN_HOME # is wherever the daemon was started from. with host.chdir(zeppelin_home): utils.run_as('ubuntu', '{}/sbin/stop-zeppelin.sh'.format(ie_home)) # wait for the process to stop, since issuing a start while the # process is still running (i.e., restart) could cause it to not # start up again self.wait_for_stop(30)
def install(self): self.dist_config.add_dirs() # Copy our start/stop scripts (preserving attrs) to $HOME start_source = 'scripts/start_notebook.sh' Path(start_source).chmod(0o755) Path(start_source).chown('ubuntu', 'hadoop') stop_source = 'scripts/stop_notebook.sh' Path(stop_source).chmod(0o755) Path(stop_source).chown('ubuntu', 'hadoop') target = os.environ.get('HOME', '/home/ubuntu') Path(start_source).copy2(target) Path(stop_source).copy2(target) # Create an IPython profile utils.run_as("ubuntu", 'ipython', 'profile', 'create', 'pyspark')
def trigger_puppet(self): """ Trigger Puppet to install the desired components. """ java_version = unitdata.kv().get('java_version', '') if java_version.startswith('1.7.') and len(get_fqdn()) > 64: # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on # java to do a hostname lookup on clouds that have >64 char FQDNs # (e.g., gce). Attempt to work around this by putting the (hopefully # short) hostname into /etc/hosts so that it will (hopefully) be # used instead (see http://paste.ubuntu.com/16230171/). # NB: do this before the puppet apply, which may call java stuffs # like format namenode, which will fail if we dont get this fix # down early. short_host = subprocess.check_output(['facter', 'hostname']).strip().decode() private_ip = utils.resolve_private_address(hookenv.unit_private_ip()) if short_host and private_ip: utils.update_kv_host(private_ip, short_host) utils.manage_etc_hosts() # puppet args are bigtop-version depedent if self.bigtop_version == '1.1.0': puppet_args = [ '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp' ] else: puppet_args = [ '-d', '--parser=future', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests' ] # puppet apply runs from the root of the bigtop release source with chdir(self.bigtop_base): utils.run_as('root', 'puppet', 'apply', *puppet_args) # Do any post-puppet config on the generated config files. utils.re_edit_in_place('/etc/default/bigtop-utils', { r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format( java_home()), })
def install(self, force=false): if not force and self.is_installed(): return # download Bigtop release; unpack the recipes bigtop_dir = '/home/ubuntu/bigtop.release' if not unitdata.kv().get('bigtop-release.installed', False): Path(bigtop_dir).rmtree_p() au = ArchiveUrlFetchHandler() au.install(bigtop_dir, '/home/ubuntu') unitdata.kv().set('bigtop-release.installed', True) unitdata.kv().flush(True) hiera_dst = self.dist_config.bigtop_hiera('path') hiera_conf = self.dist_config.bigtop_hiera('source') utils.re_edit_in_place( hiera_conf, { r'.*:datadir.*': '{0}/{1}'.format(hiera_dst, hiera_conf), }) # generate site.yaml. Something like this would do setup_bigtop_config( bigtop_dir, "{0}/hieradata/site.yaml".format(os.path.dirname(hiera_conf))) # install required puppet modules try: utils.run_as('root', 'puppet', 'module', 'install', 'puppetlabs-stdlib', 'puppetlabs-apt') except CalledProcessError: pass # All modules are set try: utils.run_as( 'root', 'root', 'puppet', 'apply', '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp') except CalledProcessError: pass # Everything seems to be fine unitdata.kv().set('bigtop.installed', True) unitdata.kv().flush(True)
def configure_flume(self): config = hookenv.config() templating.render( source='flume.conf.j2', target=self.dist_config.path('flume_conf') / 'flume.conf', context={'dist_config': self.dist_config, 'config': config}) flume_bin = self.dist_config.path('flume') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if flume_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], flume_bin]) env['FLUME_CONF_DIR'] = self.dist_config.path('flume_conf') env['FLUME_CLASSPATH'] = self.dist_config.path('flume') / 'lib' env['FLUME_HOME'] = self.dist_config.path('flume') # flume_env = self.dist_config.path('flume_conf') / 'flume-env.sh' # utils.re_edit_in_place(flume_env, { # }) utils.run_as('flume', 'hdfs', 'dfs', '-mkdir', '-p', '/user/flume')
def run(self, user, command, *args, **kwargs): """ Run a Hadoop command as the `hdfs` user. :param str command: Command to run, prefixed with `bin/` or `sbin/` :param list args: Additional args to pass to the command """ return utils.run_as(user, self.dist_config.path('hadoop') / command, *args, **kwargs)
def update_zoo_cfg( zkid=getid(local_unit()), ip=unit_private_ip(), remove=False): ''' Configuration for a Zookeeper quorum requires listing all unique servers (server.X=<ip>:2888:3888) in the zoo.cfg. This function updates server.X entries and restarts the zookeeper service. ''' zookeeper_cfg = "{}/zoo.cfg".format( os.environ.get('ZOOCFGDIR', '/etc/zookeeper/conf')) key = "server.{}".format(zkid) value = "={}:2888:3888".format(ip) if remove: removeKV(zookeeper_cfg, key) return addKV(zookeeper_cfg, key, value) # restart the zk server after alterting zoo.cfg zookeeper_bin = os.environ.get('ZOO_BIN_DIR', '/usr/lib/zookeeper/bin') utils.run_as('zookeeper', '{}/zkServer.sh'.format(zookeeper_bin), 'restart')
def setup_init_scripts(self): templates_list = ['history', 'master', 'slave'] for template in templates_list: if host.init_is_systemd(): template_path = '/etc/systemd/system/spark-{}.service'.format( template) else: template_path = '/etc/init/spark-{}.conf'.format(template) if os.path.exists(template_path): os.remove(template_path) self.stop() mode = hookenv.config()['spark_execution_mode'] templates_list = ['history'] if mode == 'standalone': templates_list.append('master') templates_list.append('slave') for template in templates_list: template_name = '{}-upstart.conf'.format(template) template_path = '/etc/init/spark-{}.conf'.format(template) if host.init_is_systemd(): template_name = '{}-systemd.conf'.format(template) template_path = '/etc/systemd/system/spark-{}.service'.format( template) render( template_name, template_path, context={ 'spark_bin': self.dist_config.path('spark'), 'master': self.get_master() }, ) if host.init_is_systemd(): utils.run_as('root', 'systemctl', 'enable', 'spark-{}.service'.format(template)) if host.init_is_systemd(): utils.run_as('root', 'systemctl', 'daemon-reload')
def configure_events_dir(self, mode): """ Create directory for spark event data. This directory is used by workers to store event data. It is also read by the history server when displaying event information. :param string mode: Spark execution mode to determine the dir location. """ dc = self.dist_config # Directory needs to be 777 so non-spark users can write job history # there. It needs to be g+s (HDFS is g+s by default) so all entries # are readable by spark (in the spark group). It needs to be +t so # users cannot remove files they don't own. if mode.startswith('yarn'): events_dir = 'hdfs://{}'.format(dc.path('spark_events')) utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir) utils.run_as('hdfs', 'hdfs', 'dfs', '-chmod', '1777', events_dir) utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:spark', events_dir) else: events_dir = dc.path('spark_events') events_dir.makedirs_p() events_dir.chmod(0o3777) host.chownr(events_dir, 'ubuntu', 'spark', chowntopdir=True)
def get_fqdn(): """ Return the FQDN as known by 'facter'. This must be run with utils.run_as to ensure any /etc/environment changes are honored. We may have overriden 'facter fqdn' with a 'FACTER_fqdn' environment variable (see Bigtop.check_localdomain). :returns: Sensible hostname (true FQDN or FACTER_fqdn from /etc/environment) """ hostname = utils.run_as('ubuntu', 'facter', 'fqdn', capture_output=True) return hostname.strip()
def check_hdfs_setup(self): """ Check if the initial setup has been done in HDFS. This currently consists of initializing the /user/ubuntu directory. """ try: output = utils.run_as('hdfs', 'hdfs', 'dfs', '-stat', '%u', '/user/ubuntu', capture_output=True) return output.strip() == 'ubuntu' except subprocess.CalledProcessError: return False
def install(self): ''' Perform initial one-time setup, workaround upstream bugs, and trigger puppet. ''' # Dirs are handled by the bigtop deb, so no need to call out to # dist_config to do that work. However, we want to adjust the # groups for the `ubuntu` user for better interaction with Juju. self.dist_config.add_users() # Set ports based on layer.yaml options self._add_override('zeppelin::server::server_port', self.dist_config.port('zeppelin')) self._add_override('zeppelin::server::web_socket_port', self.dist_config.port('zeppelin_websocket')) # Default spark to local mode on initial install. This will be # reconfigured if/when hadoop or spark relations are made. local_master = 'local[*]' self._add_override('zeppelin::server::spark_master_url', local_master) # The spark-client role expects hdfs by default. Since we want to # keep Hadoop optional, ensure we remove hadoopy bits from our # local spark config. This has no effect if/when a remote spark joins, # and since there is no spark history server running, the event dirs # are not important -- they just need not be 'hdfs:///blah'. events_log_dir = 'file:///tmp' self._add_override('spark::common::master_url', local_master) self._add_override('spark::common::event_log_dir', events_log_dir) self._add_override('spark::common::history_log_dir', events_log_dir) ########## # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') ########## self.trigger_bigtop() ########## # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place( zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30)
def configure_hive(self, mysql): config = hookenv.config() hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format( mysql.host(), mysql.port(), mysql.database() ) props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver" props['hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[self.cpu_arch] hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'], r'.*export HIVE_AUX_JARS_PATH *=.*': 'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar', }) # Now that we have db connection info, init our schema (only once) if not unitdata.kv().get('hive.schema.initialized'): utils.run_as('hive', 'schematool', '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized', True)
def install(self): ''' Perform initial one-time setup, workaround upstream bugs, and trigger puppet. ''' # Dirs are handled by the bigtop deb, so no need to call out to # dist_config to do that work. However, we want to adjust the # groups for the `ubuntu` user for better interaction with Juju. self.dist_config.add_users() # Set ports based on layer.yaml options self._add_override('zeppelin::server::server_port', self.dist_config.port('zeppelin')) self._add_override('zeppelin::server::web_socket_port', self.dist_config.port('zeppelin_websocket')) # Default spark to local mode on initial install. This will be # reconfigured if/when hadoop or spark relations are made. local_master = 'local[*]' self._add_override('zeppelin::server::spark_master_url', local_master) # The spark-client role expects hdfs by default. Since we want to # keep Hadoop optional, ensure we remove hadoopy bits from our # local spark config. This has no effect if/when a remote spark joins, # and since there is no spark history server running, the event dirs # are not important -- they just need not be 'hdfs:///blah'. events_log_dir = 'file:///tmp' self._add_override('spark::common::master_url', local_master) self._add_override('spark::common::event_log_dir', events_log_dir) self._add_override('spark::common::history_log_dir', events_log_dir) ########## # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') ########## self.trigger_bigtop() ########## # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place(zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30)
def start(self): flink_home = self.dist_config.path('flink') self.stop() containers = hookenv.config()['containers'] containermem = hookenv.config()['containermem'] start_flink = utils.run_as('flink', '{}/bin/yarn-session.sh'.format(flink_home), 'start', '-n', containers, '-tm', containermem, '-d', capture_output=True) for line in start_flink.splitlines(): print(line) if 'kill' in line: flink_appID = line.split(" ")[3] unitdata.kv().set('flink.ID', flink_appID)
def install(self, force=false): if not force and self.is_installed(): return # download Bigtop release; unpack the recipes bigtop_dir = '/home/ubuntu/bigtop.release' if not unitdata.kv().get('bigtop-release.installed', False): Path(bigtop_dir).rmtree_p() au = ArchiveUrlFetchHandler() au.install(bigtop_dir, '/home/ubuntu') unitdata.kv().set('bigtop-release.installed', True) unitdata.kv().flush(True) hiera_dst = self.dist_config.bigtop_hiera('path') hiera_conf = self.dist_config.bigtop_hiera('source') utils.re_edit_in_place(hiera_conf, { r'.*:datadir.*': '{0}/{1}'.format(hiera_dst, hiera_conf), }) # generate site.yaml. Something like this would do setup_bigtop_config(bigtop_dir, "{0}/hieradata/site.yaml".format(os.path.dirname(hiera_conf))) # install required puppet modules try: utils.run_as('root', 'puppet', 'module', 'install', 'puppetlabs-stdlib', 'puppetlabs-apt') except CalledProcessError: pass # All modules are set try: utils.run_as('root', 'root', 'puppet', 'apply', '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp') except CalledProcessError: pass # Everything seems to be fine unitdata.kv().set('bigtop.installed', True) unitdata.kv().flush(True)
def setup_init_script(self, user, servicename): daemon = "yarn" if user == "hdfs": daemon = "hadoop" elif user == "mapred": daemon = "mr-jobhistory" template_name = 'templates/upstart.conf' target_template_path = '/etc/init/{}.conf'.format(servicename) if host.init_is_systemd(): template_name = 'templates/systemd.conf' target_template_path = '/etc/systemd/system/{}.service'.format( servicename) d = os.path.dirname(sys.modules['jujubigdata'].__file__) source_template_path = os.path.join(d, template_name) if os.path.exists(target_template_path): os.remove(target_template_path) render( source_template_path, target_template_path, templates_dir="/", context={ 'service': servicename, 'user': user, 'hadoop_path': self.dist_config.path('hadoop'), 'hadoop_conf': self.dist_config.path('hadoop_conf'), 'daemon': daemon, }, ) if host.init_is_systemd(): utils.run_as('root', 'systemctl', 'enable', '{}.service'.format(servicename)) if host.init_is_systemd(): utils.run_as('root', 'systemctl', 'daemon-reload')
def setup_init_scripts(self): templates_list = ['history', 'master', 'slave'] for template in templates_list: if host.init_is_systemd(): template_path = '/etc/systemd/system/spark-{}.service'.format(template) else: template_path = '/etc/init/spark-{}.conf'.format(template) if os.path.exists(template_path): os.remove(template_path) self.stop() mode = hookenv.config()['spark_execution_mode'] templates_list = ['history'] if mode == 'standalone': templates_list.append('master') templates_list.append('slave') for template in templates_list: template_name = '{}-upstart.conf'.format(template) template_path = '/etc/init/spark-{}.conf'.format(template) if host.init_is_systemd(): template_name = '{}-systemd.conf'.format(template) template_path = '/etc/systemd/system/spark-{}.service'.format(template) render( template_name, template_path, context={ 'spark_bin': self.dist_config.path('spark'), 'master': self.get_master() }, ) if host.init_is_systemd(): utils.run_as('root', 'systemctl', 'enable', 'spark-{}.service'.format(template)) if host.init_is_systemd(): utils.run_as('root', 'systemctl', 'daemon-reload')
def stop(self): spark_home = self.dist_config.path('spark') # Only stop services if they're running if utils.jps("HistoryServer"): utils.run_as('ubuntu', '{}/sbin/stop-history-server.sh'.format(spark_home)) if utils.jps("Master"): utils.run_as('ubuntu', '{}/sbin/stop-master.sh'.format(spark_home)) if utils.jps("Worker"): utils.run_as('ubuntu', '{}/sbin/stop-slave.sh'.format(spark_home))
def trigger_bigtop(self): ''' Trigger the Bigtop puppet recipe that handles the Zeppelin service. ''' bigtop = Bigtop() overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.', strip=True) # The zep deb depends on spark-core which unfortunately brings in # most of hadoop. Include appropriate roles here to ensure these # packages are configured in the same way as our other Bigtop # software deployed with puppet. bigtop.render_site_yaml( roles=[ 'spark-client', 'spark-yarn-slave', 'zeppelin-server', ], overrides=overrides, ) # NB: during an upgrade, we configure the site.yaml, but do not # trigger puppet. The user must do that with the 'reinstall' action. if unitdata.kv().get('zeppelin.version.repo', False): hookenv.log("An upgrade is available and the site.yaml has been " "configured. Run the 'reinstall' action to continue.", level=hookenv.INFO) else: #################################################################### # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') #################################################################### bigtop.trigger_puppet() self.wait_for_api(30) #################################################################### # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place(zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30)