def zookeeper_changed(zookeeper): hookenv.log('Checking if Zookeeper has changed') zookeeper_servers_string = '' filesdir = '{}/files'.format(hookenv.charm_dir()) for zk_unit in zookeeper.zookeepers(): zookeeper_servers_string += '{}:{},'.format(zk_unit['host'], zk_unit['port']) if zookeeper_servers_string[:-1] not in open( '{}/nifi-1.3.0/conf/nifi.properties'.format(filesdir)).read(): hookenv.status_set( 'maintenance', 'Zookeeper has changed. Updating Apache NiFi settings and restarting' ) re_edit_in_place( '{}/nifi-1.3.0/conf/nifi.properties'.format(filesdir), { r'.*nifi.zookeeper.connect.string.*': 'nifi.zookeeper.connect.string={}'.format( zookeeper_servers_string[:-1]) }) try: subprocess.check_call([ 'bash', '{}/nifi-1.3.0/bin/nifi.sh'.format(filesdir), 'restart' ]) hookenv.status_set('active', 'Running: cluster mode with Zookeeper') set_state('apache-nifi.cluster') except subprocess.CalledProcessError: hookenv.status_set('blocked', 'Failed to restart')
def update_apps(self): # Add all services disabled unless we have a joined relation # as marked by the respective state # Enabled by default: 'filebrowser', 'jobbrowser' disabled_services = [ 'beeswax', 'impala', 'security', 'rdbms', 'jobsub', 'pig', 'hbase', 'sqoop', 'zookeeper', 'metastore', 'spark', 'oozie', 'indexer', 'search'] for key in get_states(): if "joined" in key: relname = key.split('.')[0] if 'hive' in relname: disabled_services.remove('beeswax') disabled_services.remove('metastore') if 'spark' in relname: disabled_services.remove('spark') if 'oozie' in relname: disabled_services.remove('oozie') if 'zookeeper' in relname: disabled_services.remove('zookeeper') hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini')) services_string = ','.join(disabled_services) hookenv.log("Disabled apps {}".format(services_string)) utils.re_edit_in_place(hue_config, { r'.*app_blacklist=.*': ''.join(('app_blacklist=', services_string)) }) self.check_relations()
def configure_spark(self, hostname, port): #hookenv.log("configuring spark connection via livy") hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini')) utils.re_edit_in_place(hue_config, { r'.*livy_server_host *=.*': 'livy_server_host=%s' % hostname, r'.*livy_server_port *=.*': 'livy_server_port=%s' % port })
def setup_spark_config(self): ''' copy the default configuration files to spark_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('spark') / 'conf' spark_conf = self.dist_config.path('spark_conf') spark_conf.rmtree_p() default_conf.copytree(spark_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() spark_conf.symlink(default_conf) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' if not spark_env.exists(): (self.dist_config.path('spark_conf') / 'spark-env.sh.template').copy(spark_env) spark_default = self.dist_config.path('spark_conf') / 'spark-defaults.conf' if not spark_default.exists(): (self.dist_config.path('spark_conf') / 'spark-defaults.conf.template').copy(spark_default) spark_log4j = self.dist_config.path('spark_conf') / 'log4j.properties' if not spark_log4j.exists(): (self.dist_config.path('spark_conf') / 'log4j.properties.template').copy(spark_log4j) utils.re_edit_in_place(spark_log4j, { r'log4j.rootCategory=INFO, console': 'log4j.rootCategory=ERROR, console', })
def configure_kafka(self): # Get ip:port data from our connected zookeepers if Zookeeper().connected_units() and Zookeeper().is_ready(): zks = [] for unit, data in Zookeeper().filtered_data().items(): ip = utils.resolve_private_address(data['private-address']) zks.append("%s:%s" % (ip, data['port'])) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) else: # if we have no zookeepers, make sure kafka is stopped self.stop()
def setup_spark_config(self): ''' copy the default configuration files to spark_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('spark') / 'conf' spark_conf = self.dist_config.path('spark_conf') spark_conf.rmtree_p() default_conf.copytree(spark_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() spark_conf.symlink(default_conf) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' if not spark_env.exists(): (self.dist_config.path('spark_conf') / 'spark-env.sh.template').copy(spark_env) spark_default = self.dist_config.path( 'spark_conf') / 'spark-defaults.conf' if not spark_default.exists(): (self.dist_config.path('spark_conf') / 'spark-defaults.conf.template').copy(spark_default) spark_log4j = self.dist_config.path('spark_conf') / 'log4j.properties' if not spark_log4j.exists(): (self.dist_config.path('spark_conf') / 'log4j.properties.template').copy(spark_log4j) utils.re_edit_in_place( spark_log4j, { r'log4j.rootCategory=INFO, console': 'log4j.rootCategory=ERROR, console', })
def configure_remote_db(self, mysql): hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' jdbc_url = \ "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format( mysql.host(), mysql.port(), mysql.database() ) with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = jdbc_url props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = \ "com.mysql.jdbc.Driver" hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': ('export HIVE_AUX_JARS_PATH=' '/usr/share/java/mysql-connector-java.jar'), }) # Now that we have db connection info, init our schema (only once) remote_db = hookenv.remote_service_name() if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db): tool_path = "{}/bin/schematool".format( self.dist_config.path('hive')) utils.run_as( 'ubuntu', tool_path, '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True) unitdata.kv().flush(True)
def configure(self): ''' Configure spark environment for all users ''' spark_home = self.dist_config.path('spark') spark_bin = spark_home / 'bin' # put our jar in hdfs spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(spark_home))[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: print ("File exists") # update environment variables with utils.environment_edit_in_place('/etc/environment') as env: if spark_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], spark_bin]) env['MASTER'] = hookenv.config('spark_execution_mode') env['PYSPARK_DRIVER_PYTHON'] = "ipython" env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf') env['SPARK_HOME'] = spark_home env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar" # update spark config spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf' utils.re_edit_in_place(spark_conf, { r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled true', r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir hdfs:///user/ubuntu/directory', })
def setup_gobblin(self, host, port): ''' Configure Gobblin. Each time something changes (eg) a new Haddop endpoint is present this method must be called. :param str ip: IP of the HDFS endpoint. :param str port: Port of the HDFS endpoint. ''' # Setup the environment gobblin_bin = self.dist_config.path('gobblin') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if gobblin_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], gobblin_bin]) env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin' env['GOBBLIN_WORK_DIR'] = "/user/gobblin/work" hdfs_endpoint = ''.join([host, ':', port]) # Setup gobblin configuration conf_dir = self.dist_config.path('gobblin') / 'conf' gobblin_config_template = conf_dir / 'gobblin-mapreduce.properties.template' gobblin_config = conf_dir / 'gobblin-mapreduce.properties' try: copy(gobblin_config_template, gobblin_config) except FileNotFoundError: pass utils.re_edit_in_place(gobblin_config, { r'fs.uri=hdfs://localhost:8020': 'fs.uri=hdfs://%s' % hdfs_endpoint, }) if '2.7.2' in self.hadoop_version: utils.re_edit_in_place(gobblin_config, { r'task.data.root.dir=*': 'task.data.root.dir=${env:GOBBLIN_WORK_DIR}/task' }, append_non_matches=True)
def zookeeper_config(zookeeper): hookenv.status_set('maintenance', 'Changing Apache NiFi to run as a cluster') hookenv.log( 'Adding Apache Zookeeper -- Changing Apache NiFi to run as a cluster') conf = hookenv.config() zookeeper_servers_string = '' for zk_unit in zookeeper.zookeepers(): zookeeper_servers_string += '{}:{},'.format(zk_unit['host'], zk_unit['port']) re_edit_in_place( '%s/files/nifi-1.3.0/conf/nifi.properties' % hookenv.charm_dir(), { r'.*nifi.cluster.is.node.*': 'nifi.cluster.is.node=true', r'.*nifi.cluster.node.address.*': 'nifi.cluster.node.address={}'.format(hookenv.unit_private_ip()), r'.*nifi.web.http.port.*': 'nifi.web.http.port={}'.format(conf['nifi-port']), r'.*nifi.cluster.node.protocol.port.*': 'nifi.cluster.node.protocol.port={}'.format(conf['cluster-port']), r'.*nifi.zookeeper.connect.string.*': 'nifi.zookeeper.connect.string={}'.format(zookeeper_servers_string) }) hookenv.open_port(conf['cluster-port']) filesdir = '{}/files'.format(hookenv.charm_dir()) try: subprocess.check_call( ['bash', '{}/nifi-1.3.0/bin/nifi.sh'.format(filesdir), 'restart']) hookenv.status_set('active', 'Running: cluster mode with Zookeeper') set_state('apache-nifi.cluster') except subprocess.CalledProcessError: hookenv.status_set('blocked', 'Failed to restart')
def update_apps(self): # Add all services disabled unless we have a joined relation # as marked by the respective state # Enabled by default: 'filebrowser', 'jobbrowser' disabled_services = [ 'beeswax', 'impala', 'security', 'rdbms', 'jobsub', 'pig', 'hbase', 'sqoop', 'zookeeper', 'metastore', 'spark', 'oozie', 'indexer', 'search' ] for k, v in get_states().items(): if "joined" in k: relname = k.split('.')[0] if 'hive' in relname: disabled_services.remove('beeswax') disabled_services.remove('metastore') if 'spark' in relname: disabled_services.remove('spark') if 'oozie' in relname: disabled_services.remove('oozie') if 'zookeeper' in relname: disabled_services.remove('zookeeper') hue_config = ''.join( (self.dist_config.path('hue'), '/desktop/conf/hue.ini')) services_string = ','.join(disabled_services) hookenv.log("Disabled apps {}".format(services_string)) utils.re_edit_in_place( hue_config, { r'.*app_blacklist=.*': ''.join( ('app_blacklist=', services_string)) }) self.check_relations()
def setup_gobblin(self, host, port): ''' Configure Gobblin. Each time something changes (eg) a new Haddop endpoint is present this method must be called. :param str ip: IP of the HDFS endpoint. :param str port: Port of the HDFS endpoint. ''' # Setup the environment gobblin_bin = self.dist_config.path('gobblin') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if gobblin_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], gobblin_bin]) env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin' env['GOBBLIN_WORK_DIR'] = "/user/gobblin/work" hdfs_endpoint = ''.join([host, ':', port]) # Setup gobblin configuration conf_dir = self.dist_config.path('gobblin') / 'conf' gobblin_config_template = conf_dir / 'gobblin-mapreduce.properties.template' gobblin_config = conf_dir / 'gobblin-mapreduce.properties' copy(gobblin_config_template, gobblin_config) utils.re_edit_in_place(gobblin_config, { r'fs.uri=hdfs://localhost:8020': 'fs.uri=hdfs://%s' % hdfs_endpoint, })
def configure_kafka(self, zk_units): # Get ip:port data from our connected zookeepers if not zk_units: # if we have no zookeepers, make sure kafka is stopped self.stop() else: zks = [] for remote_address, port in zk_units: ip = utils.resolve_private_address(remote_address) zks.append("%s:%s" % (ip, port)) zks.sort() zk_connect = ",".join(zks) # update consumer props cfg = self.dist_config.path('kafka_conf') / 'consumer.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, }) # update server props cfg = self.dist_config.path('kafka_conf') / 'server.properties' utils.re_edit_in_place(cfg, { r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect, })
def is_localdomain(): """ Determine if our domainname is 'localdomain'. This method is useful for determining if a machine's domainname is 'localdomain' so we can configure applications accordingly. :return: True if domainname is 'localdomain'; False otherwise """ # NB: lxd has a pesky bug where it makes all containers think they # are .localdomain when they are really .lxd: # https://bugs.launchpad.net/juju/+bug/1633126 # The .lxd domain is completely valid for lxc FQDNs, so if we are # in this scenario, update nsswitch.conf to prefer the accurate lxd dns # over /etc/hosts. All subsequent domainname tests by facter or any # other application will correctly report .lxd as the domainname. lxd_check = subprocess.check_output(['hostname', '-A']).strip().decode() if lxd_check.endswith('.lxd'): utils.re_edit_in_place('/etc/nsswitch.conf', { r'files dns': 'dns files' }) domainname = subprocess.check_output(['facter', 'domain']).strip().decode() if domainname == 'localdomain': return True else: return False
def configure_remote_db(self, mysql): hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' jdbc_url = \ "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format( mysql.host(), mysql.port(), mysql.database() ) with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = jdbc_url props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = \ "com.mysql.jdbc.Driver" hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place( hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': ('export HIVE_AUX_JARS_PATH=' '/usr/share/java/mysql-connector-java.jar'), }) # Now that we have db connection info, init our schema (only once) remote_db = hookenv.remote_service_name() if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db): tool_path = "{}/bin/schematool".format( self.dist_config.path('hive')) utils.run_as('ubuntu', tool_path, '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True) unitdata.kv().flush(True)
def setup_gobblin(self, host, port): """ Configure Gobblin. Each time something changes (eg) a new Haddop endpoint is present this method must be called. :param str ip: IP of the HDFS endpoint. :param str port: Port of the HDFS endpoint. """ # Setup the environment gobblin_bin = self.dist_config.path("gobblin") / "bin" with utils.environment_edit_in_place("/etc/environment") as env: if gobblin_bin not in env["PATH"]: env["PATH"] = ":".join([env["PATH"], gobblin_bin]) env["HADOOP_BIN_DIR"] = env["HADOOP_HOME"] + "/bin" env["GOBBLIN_WORK_DIR"] = "/user/gobblin/work" hdfs_endpoint = "".join([host, ":", port]) # Setup gobblin configuration conf_dir = self.dist_config.path("gobblin") / "conf" gobblin_config_template = conf_dir / "gobblin-mapreduce.properties.template" gobblin_config = conf_dir / "gobblin-mapreduce.properties" copy(gobblin_config_template, gobblin_config) utils.re_edit_in_place(gobblin_config, {r"fs.uri=hdfs://localhost:8020": "fs.uri=hdfs://%s" % hdfs_endpoint})
def configure_hadoop(self): java_home = Path(unitdata.kv().get('java.home')) java_bin = java_home / 'bin' hadoop_bin = self.dist_config.path('hadoop') / 'bin' hadoop_sbin = self.dist_config.path('hadoop') / 'sbin' with utils.environment_edit_in_place('/etc/environment') as env: env['JAVA_HOME'] = java_home if java_bin not in env['PATH']: env['PATH'] = ':'.join([java_bin, env['PATH']]) # ensure that correct java is used if hadoop_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hadoop_bin]) if hadoop_sbin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hadoop_sbin]) env['HADOOP_LIBEXEC_DIR'] = self.dist_config.path('hadoop') / 'libexec' env['HADOOP_INSTALL'] = self.dist_config.path('hadoop') env['HADOOP_HOME'] = self.dist_config.path('hadoop') env['HADOOP_COMMON_HOME'] = self.dist_config.path('hadoop') env['HADOOP_HDFS_HOME'] = self.dist_config.path('hadoop') env['HADOOP_MAPRED_HOME'] = self.dist_config.path('hadoop') env['HADOOP_YARN_HOME'] = self.dist_config.path('hadoop') env['YARN_HOME'] = self.dist_config.path('hadoop') env['HADOOP_CONF_DIR'] = self.dist_config.path('hadoop_conf') env['YARN_CONF_DIR'] = self.dist_config.path('hadoop_conf') env['YARN_LOG_DIR'] = self.dist_config.path('yarn_log_dir') env['HDFS_LOG_DIR'] = self.dist_config.path('hdfs_log_dir') env['HADOOP_LOG_DIR'] = self.dist_config.path('hdfs_log_dir') # for hadoop 2.2.0 only env['MAPRED_LOG_DIR'] = '/var/log/hadoop/mapred' # should be moved to config, but could env['MAPRED_PID_DIR'] = '/var/run/hadoop/mapred' # be destructive for mapreduce operation hadoop_env = self.dist_config.path('hadoop_conf') / 'hadoop-env.sh' utils.re_edit_in_place(hadoop_env, { r'export JAVA_HOME *=.*': 'export JAVA_HOME=%s' % java_home, })
def initial_config(): utils.re_edit_in_place('/etc/neo4j/neo4j.conf', { r'^#?dbms.connectors.default_listen_address=([0-9].[0-9].[0-9].[0-9]|)$': 'dbms.connectors.default_listen_address=0.0.0.0', r'^#?dbms.security.auth_enabled=false$': 'dbms.security.auth_enabled=false' })
def is_localdomain(): """ Determine if our domainname is 'localdomain'. This method is useful for determining if a machine's domainname is 'localdomain' so we can configure applications accordingly. :return: True if domainname is 'localdomain'; False otherwise """ # NB: lxd has a pesky bug where it makes all containers think they # are .localdomain when they are really .lxd: # https://bugs.launchpad.net/juju/+bug/1633126 # The .lxd domain is completely valid for lxc FQDNs, so if we are # in this scenario, update nsswitch.conf to prefer the accurate lxd dns # over /etc/hosts. All subsequent domainname tests by facter or any # other application will correctly report .lxd as the domainname. lxd_check = subprocess.check_output(['hostname', '-A']).strip().decode() if lxd_check.endswith('.lxd'): utils.re_edit_in_place('/etc/nsswitch.conf', { r'files dns': 'dns files' }) domainname = subprocess.check_output(['facter', 'domain']).strip().decode() if domainname == 'localdomain': return True else: return False
def disable_ha(self): spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' utils.re_edit_in_place(spark_env, { r'.*SPARK_DAEMON_JAVA_OPTS.*': '# SPARK_DAEMON_JAVA_OPTS', }) unitdata.kv().set('zookeepers.available', False) unitdata.kv().flush(True)
def configure_hive(self, mysql): config = hookenv.config() hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props[ 'javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format( mysql.host(), mysql.port(), mysql.database()) props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props[ 'javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver" props[ 'hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[ self.cpu_arch] hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place( hive_env, { r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'], r'.*export HIVE_AUX_JARS_PATH *=.*': 'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar', }) # Now that we have db connection info, init our schema (only once) if not unitdata.kv().get('hive.schema.initialized'): utils.run_as('hive', 'schematool', '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized', True)
def setup_zookeeper_config(self): """Setup Zookeeper configuration based on default config. Copy the default configuration files to zookeeper_conf property defined in dist.yaml """ default_conf = self.dist_config.path('zookeeper') / 'conf' zookeeper_conf = self.dist_config.path('zookeeper_conf') zookeeper_conf.rmtree_p() default_conf.copytree(zookeeper_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() zookeeper_conf.symlink(default_conf) zoo_cfg = zookeeper_conf / 'zoo.cfg' if not zoo_cfg.exists(): (zookeeper_conf / 'zoo_sample.cfg').copy(zoo_cfg) utils.re_edit_in_place(zoo_cfg, { r'^dataDir.*': 'dataDir={}'.format(self.dist_config.path('zookeeper_data_dir')), }) # Configure zookeeper environment for all users zookeeper_bin = self.dist_config.path('zookeeper') / 'bin' zookeeper_rest = self.dist_config.path('zookeeper') / 'src/contrib/rest' with utils.environment_edit_in_place('/etc/environment') as env: if zookeeper_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], zookeeper_bin]) env['ZOOCFGDIR'] = self.dist_config.path('zookeeper_conf') env['ZOO_BIN_DIR'] = zookeeper_bin env['ZOO_LOG_DIR'] = self.dist_config.path('zookeeper_log_dir') env['ZOO_REST_DIR'] = zookeeper_rest
def configure_hive(self, hostname, port): hookenv.log("configuring hive connection") hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini')) utils.re_edit_in_place(hue_config, { r'.*hive_server_host *=.*': 'hive_server_host=%s' % hostname, r'.*hive_server_port *=.*': 'hive_server_port=%s' % port })
def setup_zookeeper_config(self): """ Setup Zookeeper configuration based on default config. Copy the default configuration files to zookeeper_conf property defined in dist.yaml """ default_conf = self.dist_config.path('zookeeper') / 'conf' zookeeper_conf = self.dist_config.path('zookeeper_conf') zookeeper_conf.rmtree_p() default_conf.copytree(zookeeper_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() zookeeper_conf.symlink(default_conf) zoo_cfg = zookeeper_conf / 'zoo.cfg' if not zoo_cfg.exists(): (zookeeper_conf / 'zoo_sample.cfg').copy(zoo_cfg) utils.re_edit_in_place(zoo_cfg, { r'^dataDir.*': 'dataDir={}'.format(self.dist_config.path('zookeeper_data_dir')), }) # Configure zookeeper environment for all users zookeeper_bin = self.dist_config.path('zookeeper') / 'bin' zookeeper_rest = self.dist_config.path('zookeeper') / 'src/contrib/rest' with utils.environment_edit_in_place('/etc/environment') as env: if zookeeper_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], zookeeper_bin]) env['ZOOCFGDIR'] = self.dist_config.path('zookeeper_conf') env['ZOO_BIN_DIR'] = zookeeper_bin env['ZOO_LOG_DIR'] = self.dist_config.path('zookeeper_log_dir') env['ZOO_REST'] = zookeeper_rest
def setup_flume_config(self): ''' copy the default configuration files to flume_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('flume') / 'conf' flume_conf = self.dist_config.path('flume_conf') flume_conf.rmtree_p() default_conf.copytree(flume_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() flume_conf.symlink(default_conf) flume_env = flume_conf / 'flume-env.sh' if not flume_env.exists(): (flume_conf / 'flume-env.sh.template').copy(flume_env) flume_conf_src = flume_conf / 'flume-conf.properties.template' flume_conf_dst = flume_conf / 'flume.conf' if not flume_conf_dst.exists(): flume_conf_src.copy(flume_conf_dst) flume_log4j = self.dist_config.path('flume_conf') / 'log4j.properties' flume_logs = self.dist_config.path('flume_logs') utils.re_edit_in_place(flume_log4j, { r'^flume.log.dir.*': 'flume.log.dir={}'.format(flume_logs), })
def setup_flume_config(self): ''' copy the default configuration files to flume_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('flume') / 'conf' flume_conf = self.dist_config.path('flume_conf') flume_conf.rmtree_p() default_conf.copytree(flume_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() flume_conf.symlink(default_conf) flume_env = self.dist_config.path('flume_conf') / 'flume-env.sh' if not flume_env.exists(): (self.dist_config.path('flume_conf') / 'flume-env.sh.template').copy(flume_env) flume_conf = self.dist_config.path('flume_conf') / 'flume.conf' if not flume_conf.exists(): (self.dist_config.path('flume_conf') / 'flume-conf.properties.template').copy(flume_conf) flume_log4j = self.dist_config.path('flume_conf') / 'log4j.properties' utils.re_edit_in_place( flume_log4j, { r'^flume.log.dir.*': 'flume.log.dir={}'.format(self.dist_config.path('flume_logs')), })
def install(self): ''' Perform initial one-time setup, workaround upstream bugs, and trigger puppet. ''' # Dirs are handled by the bigtop deb, so no need to call out to # dist_config to do that work. However, we want to adjust the # groups for the `ubuntu` user for better interaction with Juju. self.dist_config.add_users() # Set ports based on layer.yaml options self._add_override('zeppelin::server::server_port', self.dist_config.port('zeppelin')) self._add_override('zeppelin::server::web_socket_port', self.dist_config.port('zeppelin_websocket')) # Default spark to local mode on initial install. This will be # reconfigured if/when hadoop or spark relations are made. local_master = 'local[*]' self._add_override('zeppelin::server::spark_master_url', local_master) # The spark-client role expects hdfs by default. Since we want to # keep Hadoop optional, ensure we remove hadoopy bits from our # local spark config. This has no effect if/when a remote spark joins, # and since there is no spark history server running, the event dirs # are not important -- they just need not be 'hdfs:///blah'. events_log_dir = 'file:///tmp' self._add_override('spark::common::master_url', local_master) self._add_override('spark::common::event_log_dir', events_log_dir) self._add_override('spark::common::history_log_dir', events_log_dir) ########## # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') ########## self.trigger_bigtop() ########## # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place(zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30)
def install(self): ''' Perform initial one-time setup, workaround upstream bugs, and trigger puppet. ''' # Dirs are handled by the bigtop deb, so no need to call out to # dist_config to do that work. However, we want to adjust the # groups for the `ubuntu` user for better interaction with Juju. self.dist_config.add_users() # Set ports based on layer.yaml options self._add_override('zeppelin::server::server_port', self.dist_config.port('zeppelin')) self._add_override('zeppelin::server::web_socket_port', self.dist_config.port('zeppelin_websocket')) # Default spark to local mode on initial install. This will be # reconfigured if/when hadoop or spark relations are made. local_master = 'local[*]' self._add_override('zeppelin::server::spark_master_url', local_master) # The spark-client role expects hdfs by default. Since we want to # keep Hadoop optional, ensure we remove hadoopy bits from our # local spark config. This has no effect if/when a remote spark joins, # and since there is no spark history server running, the event dirs # are not important -- they just need not be 'hdfs:///blah'. events_log_dir = 'file:///tmp' self._add_override('spark::common::master_url', local_master) self._add_override('spark::common::event_log_dir', events_log_dir) self._add_override('spark::common::history_log_dir', events_log_dir) ########## # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') ########## self.trigger_bigtop() ########## # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place( zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30)
def disable_ha(self): spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' utils.re_edit_in_place( spark_env, { r'.*SPARK_DAEMON_JAVA_OPTS.*': '# SPARK_DAEMON_JAVA_OPTS', }) unitdata.kv().set('zookeepers.available', False) unitdata.kv().flush(True)
def install_java(): java_package = "openjdk-8-jdk-headless" fetch.apt_install(java_package) java_home_ = java_home() utils.re_edit_in_place( '/etc/environment', { r'#? *JAVA_HOME *=.*': 'JAVA_HOME={}'.format(java_home_), }, append_non_matches=True)
def setup_hue(self, namenodes, resourcemanagers, hdfs_port, yarn_port, yarn_http, yarn_ipc): hookenv.status_set('maintenance', 'Setting up Hue') hue_bin = self.dist_config.path('hue') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if hue_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hue_bin]) env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin' env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir') hadoop_conf = env['HADOOP_CONF_DIR'] + '/core-site.xml' yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml' mapred_conf = env['HADOOP_CONF_DIR'] + '/mapred-site.xml' with utils.xmlpropmap_edit_in_place(hadoop_conf) as props: hdfs_endpoint = props['fs.defaultFS'] with utils.xmlpropmap_edit_in_place(yarn_conf) as props: yarn_log_url = props['yarn.log.server.url'] # 19888 yarn_resmgr = props['yarn.resourcemanager.address'] # 8032 with utils.xmlpropmap_edit_in_place(mapred_conf) as props: mapred_jobhistory = props['mapreduce.jobhistory.address'] # 10020 default_conf = self.dist_config.path('hue') / 'desktop/conf' hue_conf = self.dist_config.path('hue_conf') if os.path.islink('/usr/lib/hue/desktop/conf'): return else: hue_conf.rmtree_p() default_conf.copytree(hue_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() hue_conf.symlink(default_conf) hdfs_fulluri = hdfs_endpoint.split('/')[2] hdfs_hostname = hdfs_fulluri.split(':')[0] hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini')) hue_port = self.dist_config.port('hue_web') # Fix following for HA: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_hadoop-ha/content/ha-nn-deploy-hue.html hookenv.log("Not currently supporting HA, FIX: namenodes are: " + str(namenodes) + " resmanagers: " + str(resourcemanagers)) utils.re_edit_in_place(hue_config, { r'http_port=8888': 'http_port=%s' % hue_port, #r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s' % hdfs_endpoint, r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s:%s' % (namenodes[0], hdfs_port), #r'## resourcemanager_host=localhost': 'resourcemanager_host=%s' % yarn_resmgr.split(':')[0], r'.*resourcemanager_host=localhost': 'resourcemanager_host=%s' % resourcemanagers[0], #r'## resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_resmgr.split(':')[1], r'.*resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_port, r'.*webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://%s:50070/webhdfs/v1' % namenodes[0], r'.*history_server_api_url=http://localhost:19888': 'history_server_api_url=%s' % yarn_log_url.split('/')[0], r'.*resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://%s:8088' % yarn_resmgr.split(':')[0], r'.*secret_key=.*': 'secret_key=%s' % uuid.uuid4() }) self.update_apps()
def trigger_bigtop(self): ''' Trigger the Bigtop puppet recipe that handles the Zeppelin service. ''' bigtop = Bigtop() overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.', strip=True) # The zep deb depends on spark-core which unfortunately brings in # most of hadoop. Include appropriate roles here to ensure these # packages are configured in the same way as our other Bigtop # software deployed with puppet. bigtop.render_site_yaml( roles=[ 'spark-client', 'spark-yarn-slave', 'zeppelin-server', ], overrides=overrides, ) # NB: during an upgrade, we configure the site.yaml, but do not # trigger puppet. The user must do that with the 'reinstall' action. if unitdata.kv().get('zeppelin.version.repo', False): hookenv.log("An upgrade is available and the site.yaml has been " "configured. Run the 'reinstall' action to continue.", level=hookenv.INFO) else: #################################################################### # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') #################################################################### bigtop.trigger_puppet() self.wait_for_api(30) #################################################################### # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place(zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30)
def configure_spark(self, hostname, port): #hookenv.log("configuring spark connection via livy") hue_config = ''.join( (self.dist_config.path('hue'), '/desktop/conf/hue.ini')) utils.re_edit_in_place( hue_config, { r'.*livy_server_host *=.*': 'livy_server_host=%s' % hostname, r'.*livy_server_port *=.*': 'livy_server_port=%s' % port })
def configure_hive(self, hostname, port): hookenv.log("configuring hive connection") hue_config = ''.join( (self.dist_config.path('hue'), '/desktop/conf/hue.ini')) utils.re_edit_in_place( hue_config, { r'.*hive_server_host *=.*': 'hive_server_host=%s' % hostname, r'.*hive_server_port *=.*': 'hive_server_port=%s' % port })
def configure_hive(self): ''' Called during config-changed events ''' config = hookenv.config() hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'], })
def install(self): ''' Perform initial one-time setup, workaround upstream bugs, and trigger puppet. ''' # Dirs are handled by the bigtop deb, so no need to call out to # dist_config to do that work. However, we want to adjust the # groups for the `ubuntu` user for better interaction with Juju. self.dist_config.add_users() # Set ports based on layer.yaml options self._add_override('zeppelin::server::server_port', self.dist_config.port('zeppelin')) self._add_override('zeppelin::server::web_socket_port', self.dist_config.port('zeppelin_web')) # Default spark to local mode on initial install. This will be # reconfigured if/when hadoop or spark relations are made. self._add_override('zeppelin::server::spark_master_url', 'local[*]') ########## # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') ########## self.trigger_bigtop() ########## # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place(zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30) ########## ########## # BUG: BIGTOP-2154 # The zep deb depends on spark-core and spark-python. However, because # of the unholy requirement to have hive tightly coupled to spark, # we need to ensure spark-datanucleus is installed. Do this after the # initial install so the bigtop repo is available to us. utils.run_as('root', 'apt-get', 'install', '-qy', 'spark-datanucleus')
def configure_yarn_mode(self): # put the spark jar in hdfs spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format( self.dist_config.path('spark')))[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: pass # jar already in HDFS from another Spark with utils.environment_edit_in_place('/etc/environment') as env: env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar" # create hdfs storage space for history server dc = self.dist_config prefix = dc.path('log_prefix') events_dir = dc.path('spark_events') events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, '')) utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir) utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', events_dir) # create hdfs storage space for spark-bench utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/spark-bench') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/spark-bench') # ensure user-provided Hadoop works hadoop_classpath = utils.run_as('hdfs', 'hadoop', 'classpath', capture_output=True) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' utils.re_edit_in_place(spark_env, { r'.*SPARK_DIST_CLASSPATH.*': 'SPARK_DIST_CLASSPATH={}'.format(hadoop_classpath), }, append_non_matches=True) # update spark-defaults spark_conf = self.dist_config.path( 'spark_conf') / 'spark-defaults.conf' etc_env = utils.read_etc_env() utils.re_edit_in_place(spark_conf, { r'.*spark.master .*': 'spark.master {}'.format(self.get_master()), }, append_non_matches=True) unitdata.kv().set('hdfs.available', True) unitdata.kv().flush(True)
def configure_hive(self): ''' Called during config-changed events ''' config = hookenv.config() hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place( hive_env, { r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'], })
def set_advertise(self): short_host = check_output(['hostname', '-s']).decode('utf8').strip() # Configure server.properties # NB: We set the advertised.host.name below to our short hostname # to kafka (admin will still have to expose kafka and ensure the # external client can resolve the short hostname to our public ip). kafka_server_conf = '/etc/kafka/conf/server.properties' utils.re_edit_in_place(kafka_server_conf, { r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % short_host, })
def zookeeper_removed(): hookenv.status_set('maintenance', 'Removing Apache NiFi from cluster') re_edit_in_place('{}/files/nifi-1.1.1/conf/nifi.properties'.format(hookenv.charm_dir()), { r'.*nifi.cluster.is.node.*': 'nifi.cluster.is.node=false' }) hookenv.close_port(hookenv.config()['cluster-port']) if service_restart('nifi'): remove_state('apache-nifi.cluster') hookenv.status_set('active', 'Running: standalone mode') else: hookenv.status_set('error', 'Failed to restart')
def config_bindings(): try: subprocess.check_call(['service','neo4j','stop']) except subprocess.CalledProcessError as exception: hooken.log(exception.output) utils.re_edit_in_place('/etc/neo4j/neo4j.conf', { r'#dbms.connector.http.address=0.0.0.0:7474': 'dbms.connector.http.address=0.0.0.0:7474', }) service_start('neo4j') hookenv.status_set('active','Ready') set_state('neo4j.installed')
def init_fw(): # this value has te be changed to set ufw rules utils.re_edit_in_place('/etc/default/ufw', { r'IPV6=yes': 'IPV6=no', }) if config('firewall_enabled'): sp.check_call(['ufw', 'allow', '22']) sp.check_output(['ufw', 'enable'], input='y\n', universal_newlines=True) else: sp.check_output(['ufw', 'disable'])
def init_fw(): conf = config() utils.re_edit_in_place('/etc/default/ufw', { r'IPV6=yes': 'IPV6=no', }) if conf['firewall-enabled']: subprocess.check_call(['ufw', 'allow', '22']) subprocess.check_output(['ufw', 'enable'], input='y\n', universal_newlines=True) else: subprocess.check_output(['ufw', 'disable'])
def setup_puppet_config(self, NN, RM): # generate site.yaml. Something like this would do hiera_dst = self.options.get('bigtop_hiera_path') hiera_conf = self.options.get('bigtop_hiera_config') hiera_site_yaml = self.options.get('bigtop_hiera_siteyaml') bigtop_site_yaml = "{0}/{1}/{2}".format(self.bigtop_dir, self.bigtop_version, hiera_site_yaml) self.prepare_bigtop_config(bigtop_site_yaml, NN, RM) # Now copy hiera.yaml to /etc/puppet & point hiera to use the above location as hieradata directory Path("{0}/{1}/{2}".format(self.bigtop_dir, self.bigtop_version, hiera_conf)).copy(hiera_dst) utils.re_edit_in_place(hiera_dst, { r'.*:datadir.*': " :datadir: {0}/".format(os.path.dirname(bigtop_site_yaml)), })
def setup_kafka_config(self): ''' copy the default configuration files to kafka_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('kafka') / 'config' kafka_conf = self.dist_config.path('kafka_conf') kafka_conf.rmtree_p() default_conf.copytree(kafka_conf) # Now remove the conf included in the tarball and symlink our real conf # dir. we've seen issues where kafka still looks for config in # KAFKA_HOME/config. default_conf.rmtree_p() kafka_conf.symlink(default_conf) # Configure immutable bits kafka_bin = self.dist_config.path('kafka') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if kafka_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], kafka_bin]) env['LOG_DIR'] = self.dist_config.path('kafka_app_logs') # note: we set the advertised.host.name below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka public_address = hookenv.unit_get('public-address') private_ip = utils.resolve_private_address( hookenv.unit_get('private-address')) kafka_server_conf = self.dist_config.path( 'kafka_conf') / 'server.properties' service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) utils.re_edit_in_place( kafka_server_conf, { r'^broker.id=.*': 'broker.id=%s' % unit_num, r'^port=.*': 'port=%s' % self.dist_config.port('kafka'), r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'), r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address, }) kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties' utils.re_edit_in_place( kafka_log4j, { r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'), }) # fix for lxc containers and some corner cases in manual provider # ensure that public_address is resolvable internally by mapping it to the private IP utils.update_kv_host(private_ip, public_address) utils.manage_etc_hosts()
def setup_kafka_config(self): ''' copy the default configuration files to kafka_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('kafka') / 'config' kafka_conf = self.dist_config.path('kafka_conf') kafka_conf.rmtree_p() default_conf.copytree(kafka_conf) # Now remove the conf included in the tarball and symlink our real conf # dir. we've seen issues where kafka still looks for config in # KAFKA_HOME/config. default_conf.rmtree_p() kafka_conf.symlink(default_conf) # Configure immutable bits kafka_bin = self.dist_config.path('kafka') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if kafka_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], kafka_bin]) env['LOG_DIR'] = self.dist_config.path('kafka_app_logs') # note: we set the advertised.host.name below to the public_address # to ensure that external (non-Juju) clients can connect to Kafka public_address = hookenv.unit_get('public-address') private_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties' service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) utils.re_edit_in_place(kafka_server_conf, { r'^broker.id=.*': 'broker.id=%s' % unit_num, r'^port=.*': 'port=%s' % self.dist_config.port('kafka'), r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'), r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address, }) kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties' utils.re_edit_in_place(kafka_log4j, { r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'), }) # fix for lxc containers and some corner cases in manual provider # ensure that public_address is resolvable internally by mapping it to the private IP utils.update_etc_hosts({private_ip: public_address}) templating.render( 'upstart.conf', '/etc/init/kafka.conf', context={ 'kafka_conf': self.dist_config.path('kafka_conf'), 'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka')) }, )
def config_bindings(): try: subprocess.check_call(['service', 'neo4j', 'stop']) except subprocess.CalledProcessError as exception: hooken.log(exception.output) utils.re_edit_in_place( '/etc/neo4j/neo4j.conf', { r'#dbms.connector.http.address=0.0.0.0:7474': 'dbms.connector.http.address=0.0.0.0:7474', }) service_start('neo4j') hookenv.status_set('active', 'Ready') set_state('neo4j.installed')
def set_advertise(self): short_host = check_output(['hostname', '-s']).decode('utf8').strip() # Configure server.properties # NB: We set the advertised.host.name below to our short hostname # to kafka (admin will still have to expose kafka and ensure the # external client can resolve the short hostname to our public ip). kafka_server_conf = '/etc/kafka/conf/server.properties' utils.re_edit_in_place( kafka_server_conf, { r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % short_host, })
def configure_notebook(self): # profile config created during install ipython_profile = "ipython_notebook_config.py" # find path to ipython_notebook_config.py pPath = "/home/ubuntu/.ipython/profile_pyspark" cmd = ['find', pPath, '-name', ipython_profile] profile_config = check_output(cmd, universal_newlines=True).strip() # update profile with standard opts and configured port port = self.dist_config.port('notebook') notebooks_dir = self.dist_config.path('notebooks') utils.re_edit_in_place(profile_config, { r'.*c.NotebookApp.ip *=.*': 'c.NotebookApp.ip = "*"', r'.*c.NotebookApp.open_browser *=.*': 'c.NotebookApp.open_browser = False', r'.*c.NotebookApp.port *=.*': 'c.NotebookApp.port = {}'.format(port), r'.*c.NotebookManager.notebook_dir *=.*': "c.NotebookManager.notebook_dir = u'{}'".format(notebooks_dir), }) spark_home = os.environ.get("SPARK_HOME", '/usr/lib/spark') py4j = "py4j-0.*.zip" cmd = "find {} -name {}".format(spark_home, py4j) # TODO: handle missing py4j py4j_path = check_output(cmd.split(), universal_newlines=True).strip() setup_source = 'scripts/00-pyspark-setup.py' Path(setup_source).chmod(0o755) Path(setup_source).chown('ubuntu', 'hadoop') utils.re_edit_in_place(setup_source, { r'py4j *=.*': 'py4j="{}"'.format(py4j_path), }) home = Path(os.environ.get('HOME', '/home/ubuntu')) profile_dir = home / '.ipython/profile_pyspark' setup_target = profile_dir / 'startup/00-pyspark-setup.py' Path(setup_source).copy2(setup_target) # Our spark charm defaults to yarn-client, so that should # be a safe default here in case MASTER isn't set. Update the env # with our spark mode and py4j location. spark_mode = os.environ.get("MASTER", "yarn-client") spark_home = Path(os.environ.get("SPARK_HOME", "/usr/lib/spark")) with utils.environment_edit_in_place('/etc/environment') as env: env['PYSPARK_DRIVER_PYTHON_OPTS'] = "notebook" env['PYSPARK_SUBMIT_ARGS'] = "--master " + spark_mode env['PYTHONPATH'] = spark_home / py4j_path
def configure(self, mode): livy_conf = self.dist_config.path('livy') / 'conf/livy-defaults.conf' if not livy_conf.exists(): (self.dist_config.path('livy') / 'conf/livy-defaults.conf.template').copy(livy_conf) etc_conf = self.dist_config.path('livy_conf') / 'livy-defaults.conf' if not etc_conf.exists(): livy_conf.symlink(etc_conf) if mode == 'yarn-client': spark_mode = 'yarn' else: spark_mode = 'process' utils.re_edit_in_place(livy_conf, { r'.*livy.server.session.factory =*.*': ' livy.server.session.factory = ' + spark_mode, })
def test_re_edit_in_place(self): fd, filename = tempfile.mkstemp() os.close(fd) tmp_file = Path(filename) try: tmp_file.write_text('foo\nbar\nqux') utils.re_edit_in_place(tmp_file, { r'oo$': 'OO', r'a': 'A', r'^qux$': 'QUX', }) self.assertEqual(tmp_file.text(), 'fOO\nbAr\nQUX') finally: tmp_file.remove()
def configure_hadoop_libs(self): if unitdata.kv().get('hadoop.extra.installed', False): return spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf' etc_env = utils.read_etc_env() hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '') utils.re_edit_in_place(spark_conf, { r'.*spark.driver.extraClassPath .*': 'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath), r'.*spark.jars .*': 'spark.jars {}'.format(hadoop_extra_classpath), }, append_non_matches=True) unitdata.kv().set('hadoop.extra.installed', True) unitdata.kv().flush(True)
def test_re_edit_in_place(self): fd, filename = tempfile.mkstemp() os.close(fd) tmp_file = Path(filename) try: tmp_file.write_text('foo\nbar\nqux') utils.re_edit_in_place(tmp_file, { r'oo$': 'OO', r'a': 'A', r'^qux$': 'QUX', }) self.assertEqual(tmp_file.text(), 'fOO\nbAr\nQUX') finally: tmp_file.remove()
def disable_yarn_mode(self): # put the spark jar in hdfs with utils.environment_edit_in_place('/etc/environment') as env: env['SPARK_JAR'] = glob('{}/lib/spark-assembly-*.jar'.format( self.dist_config.path('spark')))[0] # update spark-defaults spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf' utils.re_edit_in_place(spark_conf, { r'.*spark.master .*': 'spark.master {}'.format(self.get_master()), }, append_non_matches=True) unitdata.kv().set('hdfs.available', False) unitdata.kv().flush(True)
def configure_local_db(self): local_url = 'jdbc:derby:;databaseName=/var/lib/hive/metastore/metastore_db;create=true' local_driver = 'org.apache.derby.jdbc.EmbeddedDriver' hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = local_url props['javax.jdo.option.ConnectionUserName'] = '******' props['javax.jdo.option.ConnectionPassword'] = '******' props['javax.jdo.option.ConnectionDriverName'] = local_driver hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': '# export HIVE_AUX_JARS_PATH=', })
def install(): hookenv.log('Installing neo4j') config = hookenv.config() hookenv.open_port(config['port']) fetch.configure_sources(True) fetch.apt_install(fetch.filter_installed_packages(['neo4j'])) utils.re_edit_in_place('/etc/neo4j/neo4j-server.properties', { r'#org.neo4j.server.webserver.address=0.0.0.0': 'org.neo4j.server.webserver.address=0.0.0.0', }) # utils.re_edit_in_place('/etc/security/limits.conf', { # r'#org.neo4j.server.webserver.address=127.0.0.1': 'org.neo4j.server.webserver.address=0.0.0.0', # }) service_restart('neo4j-service') set_state('neo4j.installed')
def zookeeper_removed(): filesdir = '{}/files'.format(hookenv.charm_dir()) hookenv.status_set('maintenance', 'Removing Apache NiFi from cluster') re_edit_in_place( '{}/files/nifi-1.3.0/conf/nifi.properties'.format(hookenv.charm_dir()), {r'.*nifi.cluster.is.node.*': 'nifi.cluster.is.node=false'}) hookenv.close_port(hookenv.config()['cluster-port']) try: subprocess.check_call( ['bash', '{}/nifi-1.3.0/bin/nifi.sh'.format(filesdir), 'restart']) hookenv.status_set('active', 'Running: standalone mode') set_state('apache-nifi.installed') except subprocess.CalledProcessError: hookenv.status_set('blocked', 'Failed to restart')
def configure_yarn_mode(self): # put the spark jar in hdfs spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format( self.dist_config.path('spark')))[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: pass # jar already in HDFS from another Spark with utils.environment_edit_in_place('/etc/environment') as env: env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar" # create hdfs storage space for history server dc = self.dist_config prefix = dc.path('log_prefix') events_dir = dc.path('spark_events') events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, '')) utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir) utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', events_dir) # create hdfs storage space for spark-bench utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/spark-bench') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/spark-bench') # ensure user-provided Hadoop works hadoop_classpath = utils.run_as('hdfs', 'hadoop', 'classpath', capture_output=True) spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh' utils.re_edit_in_place(spark_env, { r'.*SPARK_DIST_CLASSPATH.*': 'SPARK_DIST_CLASSPATH={}'.format(hadoop_classpath), }, append_non_matches=True) # update spark-defaults spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf' etc_env = utils.read_etc_env() hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '') utils.re_edit_in_place(spark_conf, { r'.*spark.master .*': 'spark.master {}'.format(self.get_master()), r'.*spark.driver.extraClassPath .*': 'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath), }, append_non_matches=True) unitdata.kv().set('hdfs.available', True) unitdata.kv().flush(True)
def update_bind_address(self): """ Possibly update network interface bindings """ network_interface = config().get('network_interface') if network_interface: network_interface = get_ip_for_interface(network_interface) zookeeper_cfg = "{}/zoo.cfg".format( self.dist_config.path('zookeeper_conf')) utils.re_edit_in_place(zookeeper_cfg, { r'^clientPortAddress.*': 'clientPortAddress={}'.format( network_interface)}, append_non_matches=True)