def update_peers(self, node_list):
        '''
        This method wtill return True if the master peer was updated.
        False otherwise.
        '''
        old_master = unitdata.kv().get('spark_master.ip', 'not_set')
        master_ip = ''
        if not node_list:
            hookenv.log("No peers yet. Acting as master.")
            master_ip = utils.resolve_private_address(hookenv.unit_private_ip())
            nodes = [(hookenv.local_unit(), master_ip)]
            unitdata.kv().set('spark_all_master.ips', nodes)
            unitdata.kv().set('spark_master.ip', master_ip)
        else:
            # Use as master the node with minimum Id
            # Any ordering is fine here. Lexicografical ordering too.
            node_list.sort()
            master_ip = utils.resolve_private_address(node_list[0][1])
            unitdata.kv().set('spark_master.ip', master_ip)
            unitdata.kv().set('spark_all_master.ips', node_list)
            hookenv.log("Updating master ip to {}.".format(master_ip))

        unitdata.kv().set('spark_master.is_set', True)
        unitdata.kv().flush(True)
        # Incase of an HA setup adding peers must be treated as a potential
        # mastr change
        if (old_master != master_ip) or unitdata.kv().get('zookeepers.available', False):
            return True
        else:
            return False
Example #2
0
    def update_peers(self, node_list):
        '''
        This method wtill return True if the master peer was updated.
        False otherwise.
        '''
        old_master = unitdata.kv().get('spark_master.ip', 'not_set')
        master_ip = ''
        if not node_list:
            hookenv.log("No peers yet. Acting as master.")
            master_ip = utils.resolve_private_address(
                hookenv.unit_private_ip())
            nodes = [(hookenv.local_unit(), master_ip)]
            unitdata.kv().set('spark_all_master.ips', nodes)
            unitdata.kv().set('spark_master.ip', master_ip)
        else:
            # Use as master the node with minimum Id
            # Any ordering is fine here. Lexicografical ordering too.
            node_list.sort()
            master_ip = utils.resolve_private_address(node_list[0][1])
            unitdata.kv().set('spark_master.ip', master_ip)
            unitdata.kv().set('spark_all_master.ips', node_list)
            hookenv.log("Updating master ip to {}.".format(master_ip))

        unitdata.kv().set('spark_master.is_set', True)
        unitdata.kv().flush(True)
        # Incase of an HA setup adding peers must be treated as a potential
        # mastr change
        if (old_master != master_ip) or unitdata.kv().get(
                'zookeepers.available', False):
            return True
        else:
            return False
Example #3
0
 def hosts_map(self):
     local_host_name = hookenv.local_unit().replace('/', '-')
     local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
     result = {local_ip: local_host_name}
     for conv in self.conversations():
         addr = conv.get_remote('private-address', '')
         ip = utils.resolve_private_address(addr)
         host_name = list(conv.units)[0].replace('/', '-')
         result.update({ip: host_name})
     return result
    def trigger_puppet(self):
        # If we can't reverse resolve the hostname (like on azure), support DN
        # registration by IP address.
        # NB: determine this *before* updating /etc/hosts below since
        # gethostbyaddr will not fail if we have an /etc/hosts entry.
        reverse_dns_bad = False
        try:
            socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip()))
        except socket.herror:
            reverse_dns_bad = True
        # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
        # java to do a hostname lookup on clouds that have >64 char fqdns
        # (gce). Force short hostname (< 64 char) into /etc/hosts as workaround.
        # Better fix may be to move to java8. See http://paste.ubuntu.com/16230171/
        # NB: do this before the puppet apply, which may call java stuffs
        # like format namenode, which will fail if we dont get this fix
        # down early.
        short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
        private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        if short_host and private_ip:
            utils.update_kv_host(private_ip, short_host)
            utils.manage_etc_hosts()

        charm_dir = hookenv.charm_dir()
        # TODO JIRA KWM: rm does not need Hdfs_init and will fail
        rm_patch = Path(charm_dir) / 'resources/patch1_rm_init_hdfs.patch'
        # TODO JIRA KWM: nm should not *need* mapred role. we could patch it
        # with nm_patch, or adjust nm charm to include mapred role. for now,
        # we're doing the latter. todo rfc from dev@bigtop list.
        # nm_patch = Path(charm_dir) / 'resources/patch2_nm_core-site.patch'
        # TODO JIRA KWM: client role needs common_yarn for yarn-site.xml
        client_patch = Path(charm_dir) / 'resources/patch3_client_role_use_common_yarn.patch'
        with chdir("{}".format(self.bigtop_base)):
            # rm patch goes first
            utils.run_as('root', 'patch', '-p1', '-s', '-i', rm_patch)
            # skip nm_patch for now since nm charm is including mapred role
            # utils.run_as('root', 'patch', '-p1', '-s', '-i', nm_patch)
            # client patch goes last
            utils.run_as('root', 'patch', '-p1', '-s', '-i', client_patch)
        # TODO FIX ABOVE KWM

        # puppet apply needs to be ran where recipes were unpacked
        with chdir("{}".format(self.bigtop_base)):
            utils.run_as('root', 'puppet', 'apply', '-d',
                         '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                         'bigtop-deploy/puppet/manifests/site.pp')

        # Do any post-puppet config on the generated config files.
        if reverse_dns_bad:
            hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
            with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
                props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'false'
Example #5
0
    def configure_kafka(self, zk_units):
        # Get ip:port data from our connected zookeepers
        if not zk_units:
            # if we have no zookeepers, make sure kafka is stopped
            self.stop()
        else:
            zks = []
            for remote_address, port in zk_units:
                ip = utils.resolve_private_address(remote_address)
                zks.append("%s:%s" % (ip, port))
            zks.sort()
            zk_connect = ",".join(zks)

            # update consumer props
            cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })

            # update server props
            cfg = self.dist_config.path('kafka_conf') / 'server.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })
Example #6
0
 def hosts_map(self):
     result = {}
     for conv in self.conversations():
         ip = utils.resolve_private_address(conv.get_remote('private-address', ''))
         host_name = conv.scope.replace('/', '-')
         result.update({ip: host_name})
     return result
Example #7
0
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        kafka_port = self.dist_config.port('kafka')

        roles = ['kafka-server']
        override = {
            'kafka::server::broker_id': unit_num,
            'kafka::server::port': kafka_port,
            'kafka::server::zookeeper_connection_string': zk_connect,
        }
        if network_interface:
            ip = Bigtop().get_ip_for_interface(network_interface)
            override['kafka::server::bind_addr'] = ip

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()
        self.set_advertise()
        self.restart()
Example #8
0
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)

        # update consumer props
        cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
        utils.re_edit_in_place(
            cfg, {
                r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
            })

        # update server props
        cfg = self.dist_config.path('kafka_conf') / 'server.properties'
        utils.re_edit_in_place(
            cfg, {
                r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
            })

        # Possibly bind a network interface
        if network_interface:
            utils.re_edit_in_place(
                cfg, {
                    r'^#?host.name=.*':
                    'host.name={}'.format(
                        get_ip_for_interface(network_interface)),
                })
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)

        # update consumer props
        cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
        utils.re_edit_in_place(cfg, {
            r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
        })

        # update server props
        cfg = self.dist_config.path('kafka_conf') / 'server.properties'
        utils.re_edit_in_place(cfg, {
            r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
        })

        # Possibly bind a network interface
        if network_interface:
            utils.re_edit_in_place(cfg, {
                r'^#?host.name=.*': 'host.name={}'.format(
                    get_ip_for_interface(network_interface)),
            })
Example #10
0
 def am_i_registered(self):
     my_ip = utils.resolve_private_address(
         hookenv.unit_get('private-address'))
     my_hostname = hookenv.local_unit().replace('/', '-')
     unit, data = any_ready_unit(self.relation_name)
     etc_hosts = json.loads((data or {}).get('etc_hosts', '{}'))
     return etc_hosts.get(my_ip, None) == my_hostname
Example #11
0
def client_present(client):
    if is_state('leadership.is_leader'):
        client.set_spark_started()
        spark = Spark()
        master_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        master_url = spark.get_master_url(master_ip)
        client.send_master_info(master_url, master_ip)
Example #12
0
 def get_zk_connect(self, zk_units):
     zks = []
     for unit in zk_units:
         ip = utils.resolve_private_address(unit['host'])
         zks.append(ip)
     zks.sort()
     return ",".join(zks)
Example #13
0
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        kafka_port = self.dist_config.port('kafka')

        roles = ['kafka-server']
        override = {
            'kafka::server::broker_id': unit_num,
            'kafka::server::port': kafka_port,
            'kafka::server::zookeeper_connection_string': zk_connect,
        }
        if network_interface:
            ip = Bigtop().get_ip_for_interface(network_interface)
            override['kafka::server::bind_addr'] = ip

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()
        self.set_advertise()
        self.restart()
Example #14
0
 def get_zk_connect(self, zk_units):
     zks = []
     for unit in zk_units:
         ip = utils.resolve_private_address(unit['host'])
         zks.append(ip)
     zks.sort()
     return ",".join(zks)
Example #15
0
    def configure_kafka(self):
        # Get ip:port data from our connected zookeepers
        if Zookeeper().connected_units() and Zookeeper().is_ready():
            zks = []
            for unit, data in Zookeeper().filtered_data().items():
                ip = utils.resolve_private_address(data['private-address'])
                zks.append("%s:%s" % (ip, data['port']))
            zks.sort()
            zk_connect = ",".join(zks)

            # update consumer props
            cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })

            # update server props
            cfg = self.dist_config.path('kafka_conf') / 'server.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })
        else:
            # if we have no zookeepers, make sure kafka is stopped
            self.stop()
Example #16
0
 def nodes(self):
     return [{
         'host':
         conv.scope.replace('/', '-'),
         'ip':
         utils.resolve_private_address(
             conv.get_remote('private-address', '')),
     } for conv in self.conversations()]
 def nodes(self):
     return [
         {
             'host': conv.scope.replace('/', '-'),
             'ip': utils.resolve_private_address(conv.get_remote('private-address', '')),
         }
         for conv in self.conversations()
     ]
def configure_resourcemanager():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_resourcemanager()
    yarn.configure_jobhistory()
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('resourcemanager.configured')
def configure_resourcemanager():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    yarn = YARN(hadoop)
    yarn.configure_resourcemanager()
    yarn.configure_jobhistory()
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('resourcemanager.configured')
Example #20
0
 def get_master(self):
     mode = hookenv.config()['spark_execution_mode']
     master = None
     if mode.startswith('local') or mode == 'yarn-cluster':
         master = mode
     elif mode == 'standalone':
         local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
         master = 'spark://{}:7077'.format(local_ip)
     elif mode.startswith('yarn'):
         master = 'yarn-client'
     return master
Example #21
0
 def get_master(self):
     mode = hookenv.config()['spark_execution_mode']
     master = None
     if mode.startswith('local') or mode == 'yarn-cluster':
         master = mode
     elif mode == 'standalone':
         local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
         master = 'spark://{}:7077'.format(local_ip)
     elif mode.startswith('yarn'):
         master = 'yarn-client'
     return master
Example #22
0
    def setup_kafka_config(self):
        '''
        copy the default configuration files to kafka_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('kafka') / 'config'
        kafka_conf = self.dist_config.path('kafka_conf')
        kafka_conf.rmtree_p()
        default_conf.copytree(kafka_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        # dir. we've seen issues where kafka still looks for config in
        # KAFKA_HOME/config.
        default_conf.rmtree_p()
        kafka_conf.symlink(default_conf)

        # Configure immutable bits
        kafka_bin = self.dist_config.path('kafka') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if kafka_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], kafka_bin])
            env['LOG_DIR'] = self.dist_config.path('kafka_app_logs')

        # note: we set the advertised.host.name below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka
        public_address = hookenv.unit_get('public-address')
        private_ip = utils.resolve_private_address(
            hookenv.unit_get('private-address'))
        kafka_server_conf = self.dist_config.path(
            'kafka_conf') / 'server.properties'
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        utils.re_edit_in_place(
            kafka_server_conf, {
                r'^broker.id=.*':
                'broker.id=%s' % unit_num,
                r'^port=.*':
                'port=%s' % self.dist_config.port('kafka'),
                r'^log.dirs=.*':
                'log.dirs=%s' % self.dist_config.path('kafka_data_logs'),
                r'^#?advertised.host.name=.*':
                'advertised.host.name=%s' % public_address,
            })

        kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties'
        utils.re_edit_in_place(
            kafka_log4j, {
                r'^kafka.logs.dir=.*':
                'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'),
            })

        # fix for lxc containers and some corner cases in manual provider
        # ensure that public_address is resolvable internally by mapping it to the private IP
        utils.update_kv_host(private_ip, public_address)
        utils.manage_etc_hosts()
def restart_services(spark):
    master_info = spark.get_master_info()
    master_url = master_info['connection_string']
    if data_changed('insightedge.master_url', master_url):
        master_ip = master_info['master']
        local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        is_master = master_ip == local_ip
        stop_datagrid_services()
        start_datagrid_services(master_url, is_master, not is_master
                                or not spark.is_scaled())
    set_state('insightedge.ready')
    hookenv.status_set('active', 'ready')
Example #24
0
    def setup_kafka_config(self):
        '''
        copy the default configuration files to kafka_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('kafka') / 'config'
        kafka_conf = self.dist_config.path('kafka_conf')
        kafka_conf.rmtree_p()
        default_conf.copytree(kafka_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        # dir. we've seen issues where kafka still looks for config in
        # KAFKA_HOME/config.
        default_conf.rmtree_p()
        kafka_conf.symlink(default_conf)

        # Configure immutable bits
        kafka_bin = self.dist_config.path('kafka') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if kafka_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], kafka_bin])
            env['LOG_DIR'] = self.dist_config.path('kafka_app_logs')

        # note: we set the advertised.host.name below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka
        public_address = hookenv.unit_get('public-address')
        private_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
        kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties'
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        utils.re_edit_in_place(kafka_server_conf, {
            r'^broker.id=.*': 'broker.id=%s' % unit_num,
            r'^port=.*': 'port=%s' % self.dist_config.port('kafka'),
            r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'),
            r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address,
        })

        kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties'
        utils.re_edit_in_place(kafka_log4j, {
            r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'),
        })

        # fix for lxc containers and some corner cases in manual provider
        # ensure that public_address is resolvable internally by mapping it to the private IP
        utils.update_etc_hosts({private_ip: public_address})

        templating.render(
            'upstart.conf',
            '/etc/init/kafka.conf',
            context={
                'kafka_conf': self.dist_config.path('kafka_conf'),
                'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka'))
            },
        )
Example #25
0
def configure_namenode():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('namenode.started')
def configure_namenode():
    local_hostname = hookenv.local_unit().replace("/", "-")
    private_address = hookenv.unit_get("private-address")
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports("namenode")
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state("namenode.started")
Example #27
0
    def configure_hosts_file(self):
        """
        Add the unit's private-address to /etc/hosts to ensure that Java
        can resolve the hostname of the server to its real IP address.
        We derive our hostname from the unit_id, replacing / with -.
        """
        local_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
        hostname = hookenv.local_unit().replace('/', '-')
        utils.update_etc_hosts({local_ip: hostname})

        # update name of host to more semantically meaningful value
        # (this is required on some providers; the /etc/hosts entry must match
        # the /etc/hostname lest Hadoop get confused about where certain things
        # should be run)
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
Example #28
0
    def configure(self, hosts, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit["host"])
            zks.append(ip)
        zks.sort()
        zk_connect = ",".join(zks)

        roles = ["hbase-server", "hbase-master", "hbase-client"]

        override = {
            "hadoop_hbase::common_config::zookeeper_quorum": zk_connect,
            "hadoop_hbase::deploy::auxiliary": False,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
Example #29
0
    def configure_ha(self, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))

        zk_connect = ",".join(zks)

        daemon_opts = ('-Dspark.deploy.recoveryMode=ZOOKEEPER '
                       '-Dspark.deploy.zookeeper.url={}'.format(zk_connect))

        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DAEMON_JAVA_OPTS.*': 'SPARK_DAEMON_JAVA_OPTS=\"{}\"'.format(daemon_opts),
            r'.*SPARK_MASTER_IP.*': '# SPARK_MASTER_IP',
        })
        unitdata.kv().set('zookeepers.available', True)
        unitdata.kv().flush(True)
Example #30
0
    def configure(self, hosts, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append(ip)
        zks.sort()
        zk_connect = ",".join(zks)

        roles = ['hbase-server', 'hbase-master', 'hbase-client']

        override = {
            'hadoop_hbase::common_config::zookeeper_quorum': zk_connect,
            'hadoop_hbase::deploy::auxiliary': False
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
    def trigger_puppet(self):
        """
        Trigger Puppet to install the desired components.
        """
        java_version = unitdata.kv().get('java_version', '')
        if java_version.startswith('1.7.') and len(get_fqdn()) > 64:
            # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
            # java to do a hostname lookup on clouds that have >64 char FQDNs
            # (e.g., gce). Attempt to work around this by putting the (hopefully
            # short) hostname into /etc/hosts so that it will (hopefully) be
            # used instead (see http://paste.ubuntu.com/16230171/).
            # NB: do this before the puppet apply, which may call java stuffs
            # like format namenode, which will fail if we dont get this fix
            # down early.
            short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
            private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
            if short_host and private_ip:
                utils.update_kv_host(private_ip, short_host)
                utils.manage_etc_hosts()

        # puppet args are bigtop-version depedent
        if self.bigtop_version == '1.1.0':
            puppet_args = [
                '-d',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests/site.pp'
            ]
        else:
            puppet_args = [
                '-d',
                '--parser=future',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests'
            ]

        # puppet apply runs from the root of the bigtop release source
        with chdir(self.bigtop_base):
            utils.run_as('root', 'puppet', 'apply', *puppet_args)

        # Do any post-puppet config on the generated config files.
        utils.re_edit_in_place('/etc/default/bigtop-utils', {
            r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format(
                java_home()),
        })
    def trigger_puppet(self):
        """
        Trigger Puppet to install the desired components.
        """
        java_version = unitdata.kv().get('java_version', '')
        if java_version.startswith('1.7.') and len(get_fqdn()) > 64:
            # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
            # java to do a hostname lookup on clouds that have >64 char FQDNs
            # (e.g., gce). Attempt to work around this by putting the (hopefully
            # short) hostname into /etc/hosts so that it will (hopefully) be
            # used instead (see http://paste.ubuntu.com/16230171/).
            # NB: do this before the puppet apply, which may call java stuffs
            # like format namenode, which will fail if we dont get this fix
            # down early.
            short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
            private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
            if short_host and private_ip:
                utils.update_kv_host(private_ip, short_host)
                utils.manage_etc_hosts()

        # puppet args are bigtop-version depedent
        if self.bigtop_version == '1.1.0':
            puppet_args = [
                '-d',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests/site.pp'
            ]
        else:
            puppet_args = [
                '-d',
                '--parser=future',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests'
            ]

        # puppet apply runs from the root of the bigtop release source
        with chdir(self.bigtop_base):
            utils.run_as('root', 'puppet', 'apply', *puppet_args)

        # Do any post-puppet config on the generated config files.
        utils.re_edit_in_place('/etc/default/bigtop-utils', {
            r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format(
                java_home()),
        })
Example #33
0
    def configure_hosts_file(self):
        """
        Add the unit's private-address to /etc/hosts to ensure that Java
        can resolve the hostname of the server to its real IP address.
        We derive our hostname from the unit_id, replacing / with -.
        """
        local_ip = utils.resolve_private_address(
            hookenv.unit_get('private-address'))
        hostname = hookenv.local_unit().replace('/', '-')
        utils.update_kv_hosts({local_ip: hostname})
        utils.manage_etc_hosts()

        # update name of host to more semantically meaningful value
        # (this is required on some providers; the /etc/hosts entry must match
        # the /etc/hostname lest Hadoop get confused about where certain things
        # should be run)
        etc_hostname = Path('/etc/hostname')
        etc_hostname.write_text(hostname)
        check_call(['hostname', '-F', etc_hostname])
Example #34
0
    def configure_kafka(self, zk_units):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)

        # update consumer props
        cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
        utils.re_edit_in_place(cfg, {
            r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
        })

        # update server props
        cfg = self.dist_config.path('kafka_conf') / 'server.properties'
        utils.re_edit_in_place(cfg, {
            r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
        })
Example #35
0
    def configure_ha(self, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))

        zk_connect = ",".join(zks)

        daemon_opts = ('-Dspark.deploy.recoveryMode=ZOOKEEPER '
                       '-Dspark.deploy.zookeeper.url={}'.format(zk_connect))

        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(
            spark_env, {
                r'.*SPARK_DAEMON_JAVA_OPTS.*':
                'SPARK_DAEMON_JAVA_OPTS=\"{}\"'.format(daemon_opts),
                r'.*SPARK_MASTER_IP.*':
                '# SPARK_MASTER_IP',
            })
        unitdata.kv().set('zookeepers.available', True)
        unitdata.kv().flush(True)
    def check_reverse_dns(self):
        """
        Determine if reverse DNS lookups work on a machine.

        Some Hadoop services expect forward and reverse DNS to work.
        Not all clouds (eg, Azure) offer a working reverse-DNS environment.
        Additionally, we can assume any machine with a domainname of
        'localdomain' does not have proper reverse-DNS capabilities. If either
        of these scenarios are present, set appropriate unit data so we can
        configure around this limitation.

        NB: call this *before* any /etc/hosts changes since
        gethostbyaddr will not fail if we have an /etc/hosts entry.
        """
        reverse_dns_ok = True
        if is_localdomain():
            reverse_dns_ok = False
        else:
            try:
                socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip()))
            except socket.herror:
                reverse_dns_ok = False
        unitdata.kv().set('reverse_dns_ok', reverse_dns_ok)
    def check_reverse_dns(self):
        """
        Determine if reverse DNS lookups work on a machine.

        Some Hadoop services expect forward and reverse DNS to work.
        Not all clouds (eg, Azure) offer a working reverse-DNS environment.
        Additionally, we can assume any machine with a domainname of
        'localdomain' does not have proper reverse-DNS capabilities. If either
        of these scenarios are present, set appropriate unit data so we can
        configure around this limitation.

        NB: call this *before* any /etc/hosts changes since
        gethostbyaddr will not fail if we have an /etc/hosts entry.
        """
        reverse_dns_ok = True
        if is_localdomain():
            reverse_dns_ok = False
        else:
            try:
                socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip()))
            except socket.herror:
                reverse_dns_ok = False
        unitdata.kv().set('reverse_dns_ok', reverse_dns_ok)
Example #38
0
    def configure_kafka(self):
        # Get ip:port data from our connected zookeepers
        if Zookeeper().connected_units() and Zookeeper().is_ready():
            zks = []
            for unit, data in Zookeeper().filtered_data().items():
                ip = utils.resolve_private_address(data['private-address'])
                zks.append("%s:%s" % (ip, data['port']))
            zks.sort()
            zk_connect = ",".join(zks)

            # update consumer props
            cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
            })

            # update server props
            cfg = self.dist_config.path('kafka_conf') / 'server.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*': 'zookeeper.connect=%s' % zk_connect,
            })
        else:
            # if we have no zookeepers, make sure kafka is stopped
            self.stop()
Example #39
0
 def register_connected_hosts(self):
     for unit, data in self.unfiltered_data().items():
         ip = utils.resolve_private_address(data['private-address'])
         name = unit.replace('/', '-')
         utils.update_kv_host(ip, name)
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(
            available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']
        if mode.startswith('yarn'):
            spark_events = 'hdfs://{}'.format(dc.path('spark_events'))
        else:
            spark_events = 'file://{}'.format(dc.path('spark_events'))

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. "
                    "Using 1g default.",
                    level=hookenv.WARNING)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. "
                    "Using 1g default.",
                    level=hookenv.WARNING)
        else:
            executor_mem = req_executor_mem

        # Some spark applications look for envars in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
            env['SPARK_HOME'] = dc.path('spark_home')

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url':
            master_url,
            'spark::common::event_log_dir':
            spark_events,
            'spark::common::history_log_dir':
            spark_events,
            'spark::common::extra_lib_dirs':
            ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem':
            driver_mem,
            'spark::common::executor_mem':
            executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet.
        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        if unitdata.kv().get('spark.version.repo', False):
            hookenv.log(
                "An upgrade is available and the site.yaml has been "
                "configured. Run the 'reinstall' action to continue.",
                level=hookenv.INFO)
        else:
            bigtop.trigger_puppet()
            self.patch_worker_master_url(master_ip, master_url)

            # Packages don't create the event dir by default. Do it each time
            # spark is (re)installed to ensure location/perms are correct.
            self.configure_events_dir(mode)

        # Handle examples and Spark-Bench. Do this each time this method is
        # called in case we need to act on a new resource or user config.
        self.configure_examples()
        self.configure_sparkbench()
 def is_master(self):
     unit_ip = utils.resolve_private_address(hookenv.unit_private_ip())
     master_ip = self.get_master_ip()
     return unit_ip == master_ip
Example #42
0
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            executor_mem = req_executor_mem

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = self.get_master()
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_DRIVER_MEMORY'] = driver_mem
            env['SPARK_EXECUTOR_MEMORY'] = executor_mem
            env['SPARK_HOME'] = spark_home
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # update spark config
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master *.*': 'spark.master {}'.format(self.get_master()),
            r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled true',
            r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir hdfs:///user/ubuntu/directory',
            })
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DRIVER_MEMORY.*': 'SPARK_DRIVER_MEMORY={}'.format(driver_mem),
            r'.*SPARK_EXECUTOR_MEMORY.*': 'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
            r'.*SPARK_LOG_DIR.*': 'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')),
            r'.*SPARK_MASTER_IP.*': 'SPARK_MASTER_IP={}'.format(local_ip),
            r'.*SPARK_WORKER_DIR.*': 'SPARK_WORKER_DIR={}'.format(self.dist_config.path('spark_work')),
            })

        # manage SparkBench
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/spark-bench'
        if install_sb:
            if utils.cpu_arch() == 'ppc64le':
                sb_url = hookenv.config()['spark_bench_ppc64le']
            else:
                # TODO: may need more arch cases (go with x86 sb for now)
                sb_url = hookenv.config()['spark_bench_x86_64']

            Path(sb_dir).rmtree_p()
            fetcher = ArchiveUrlFetchHandler()
            fetcher.install(sb_url, '/home/ubuntu')

            # #####
            # Handle glob if we use a .tgz that doesn't expand to sb_dir
            # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0]
            # SparkBench expects to live in ~/spark-bench, so put it there
            # Path(sb_archive_dir).rename(sb_dir)
            # #####

            # comment out mem tunings (let them come from /etc/environment)
            sb_env = Path(sb_dir) / 'conf/env.sh'
            utils.re_edit_in_place(sb_env, {
                r'^SPARK_DRIVER_MEMORY.*': '# SPARK_DRIVER_MEMORY (use value from environment)',
                r'^SPARK_EXECUTOR_MEMORY.*': '# SPARK_EXECUTOR_MEMORY (use value from environment)',
                })
        else:
            Path(sb_dir).rmtree_p()
Example #43
0
    def configure(self, available_hosts, zk_units, peers):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is ready
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """
        # Bootstrap spark
        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url': master_url,
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()

        # Do this after our puppet bits in case puppet overrides needed perms
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip, master_url)

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            executor_mem = req_executor_mem

        spark_env = '/etc/spark/conf/spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DRIVER_MEMORY.*': 'export SPARK_DRIVER_MEMORY={}'.format(driver_mem),
            r'.*SPARK_EXECUTOR_MEMORY.*': 'export SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
        }, append_non_matches=True)

        # Install SB (subsequent calls will reconfigure existing install)
        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
        self.install_benchmark()
Example #44
0
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']
        if mode.startswith('yarn'):
            spark_events = 'hdfs://{}'.format(dc.path('spark_events'))
        else:
            spark_events = 'file://{}'.format(dc.path('spark_events'))

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. "
                            "Using 1g default.", level=hookenv.WARNING)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. "
                            "Using 1g default.", level=hookenv.WARNING)
        else:
            executor_mem = req_executor_mem

        # Some spark applications look for envars in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
            env['SPARK_HOME'] = dc.path('spark_home')

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url': master_url,
            'spark::common::event_log_dir': spark_events,
            'spark::common::history_log_dir': spark_events,
            'spark::common::extra_lib_dirs':
                ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem': driver_mem,
            'spark::common::executor_mem': executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet.
        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        if unitdata.kv().get('spark.version.repo', False):
            hookenv.log("An upgrade is available and the site.yaml has been "
                        "configured. Run the 'reinstall' action to continue.",
                        level=hookenv.INFO)
        else:
            bigtop.trigger_puppet()
            self.patch_worker_master_url(master_ip, master_url)

            # Packages don't create the event dir by default. Do it each time
            # spark is (re)installed to ensure location/perms are correct.
            self.configure_events_dir(mode)

        # Handle examples and Spark-Bench. Do this each time this method is
        # called in case we need to act on a new resource or user config.
        self.configure_examples()
        self.configure_sparkbench()
Example #45
0
 def register_connected_hosts(self):
     for unit, data in self.unfiltered_data().items():
         ip = utils.resolve_private_address(data['private-address'])
         name = unit.replace('/', '-')
         utils.update_kv_host(ip, name)
Example #46
0
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            executor_mem = req_executor_mem

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = self.get_master()
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_DRIVER_MEMORY'] = driver_mem
            env['SPARK_EXECUTOR_MEMORY'] = executor_mem
            env['SPARK_HOME'] = spark_home
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # update spark config
        spark_conf = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(
            spark_conf, {
                r'.*spark.master *.*':
                'spark.master {}'.format(self.get_master()),
                r'.*spark.eventLog.enabled *.*':
                'spark.eventLog.enabled true',
                r'.*spark.eventLog.dir *.*':
                'spark.eventLog.dir hdfs:///user/ubuntu/directory',
            })
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        utils.re_edit_in_place(
            spark_env, {
                r'.*SPARK_DRIVER_MEMORY.*':
                'SPARK_DRIVER_MEMORY={}'.format(driver_mem),
                r'.*SPARK_EXECUTOR_MEMORY.*':
                'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
                r'.*SPARK_LOG_DIR.*':
                'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')),
                r'.*SPARK_MASTER_IP.*':
                'SPARK_MASTER_IP={}'.format(local_ip),
                r'.*SPARK_WORKER_DIR.*':
                'SPARK_WORKER_DIR={}'.format(
                    self.dist_config.path('spark_work')),
            })

        # manage SparkBench
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/spark-bench'
        if install_sb:
            if utils.cpu_arch() == 'ppc64le':
                sb_url = hookenv.config()['spark_bench_ppc64le']
            else:
                # TODO: may need more arch cases (go with x86 sb for now)
                sb_url = hookenv.config()['spark_bench_x86_64']

            Path(sb_dir).rmtree_p()
            fetcher = ArchiveUrlFetchHandler()
            fetcher.install(sb_url, '/home/ubuntu')

            # #####
            # Handle glob if we use a .tgz that doesn't expand to sb_dir
            # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0]
            # SparkBench expects to live in ~/spark-bench, so put it there
            # Path(sb_archive_dir).rename(sb_dir)
            # #####

            # comment out mem tunings (let them come from /etc/environment)
            sb_env = Path(sb_dir) / 'conf/env.sh'
            utils.re_edit_in_place(
                sb_env, {
                    r'^SPARK_DRIVER_MEMORY.*':
                    '# SPARK_DRIVER_MEMORY (use value from environment)',
                    r'^SPARK_EXECUTOR_MEMORY.*':
                    '# SPARK_EXECUTOR_MEMORY (use value from environment)',
                })
        else:
            Path(sb_dir).rmtree_p()
Example #47
0
    def setup_kafka_config(self):
        '''
        copy the default configuration files to kafka_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('kafka') / 'config'
        kafka_conf = self.dist_config.path('kafka_conf')
        kafka_conf.rmtree_p()
        default_conf.copytree(kafka_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        # dir. we've seen issues where kafka still looks for config in
        # KAFKA_HOME/config.
        default_conf.rmtree_p()
        kafka_conf.symlink(default_conf)

        # Similarly, we've seen issues where kafka wants to write to
        # KAFKA_HOME/logs regardless of the LOG_DIR, so make a symlink.
        default_logs = self.dist_config.path('kafka') / 'logs'
        kafka_logs = self.dist_config.path('kafka_app_logs')
        default_logs.rmtree_p()
        kafka_logs.symlink(default_logs)

        # Configure environment
        kafka_bin = self.dist_config.path('kafka') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if kafka_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], kafka_bin])
            env['LOG_DIR'] = self.dist_config.path('kafka_app_logs')

        # Configure server.properties
        # note: we set the advertised.host.name below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka
        public_address = hookenv.unit_get('public-address')
        private_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
        kafka_port = self.dist_config.port('kafka')
        kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties'
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        utils.re_edit_in_place(kafka_server_conf, {
            r'^broker.id=.*': 'broker.id=%s' % unit_num,
            r'^port=.*': 'port=%s' % kafka_port,
            r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'),
            r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address,
        })

        # Configure producer.properties
        # note: we set the broker list host below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka.
        # It must match our advertised.host.name from above.
        kafka_producer_conf = self.dist_config.path('kafka_conf') / 'producer.properties'
        utils.re_edit_in_place(kafka_producer_conf, {
            r'^#?metadata.broker.list=.*': 'metadata.broker.list=%s:%s' % (public_address, kafka_port),
        })

        # Configure log properties
        kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties'
        utils.re_edit_in_place(kafka_log4j, {
            r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'),
        })

        template_name = 'upstart.conf'
        template_path = '/etc/init/kafka.conf'
        if host.init_is_systemd():
            template_name = 'systemd.conf'
            template_path = '/etc/systemd/system/kafka.service'

        templating.render(
            template_name,
            template_path,
            context={
                'kafka_conf': self.dist_config.path('kafka_conf'),
                'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka'))
            },
        )

        # fix for lxc containers and some corner cases in manual provider
        # ensure that public_address is resolvable internally by mapping it to the private IP
        utils.update_kv_host(private_ip, public_address)
        utils.manage_etc_hosts()
Example #48
0
 def is_master(self):
     unit_ip = utils.resolve_private_address(hookenv.unit_private_ip())
     master_ip = self.get_master_ip()
     return unit_ip == master_ip
Example #49
0
 def am_i_registered(self):
     my_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
     my_hostname = hookenv.local_unit().replace('/', '-')
     unit, data = any_ready_unit(self.relation_name)
     etc_hosts = json.loads((data or {}).get('etc_hosts', '{}'))
     return etc_hosts.get(my_ip, None) == my_hostname
Example #50
0
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Bootstrap spark
        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(
            available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            executor_mem = req_executor_mem

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()
        else:
            # Bigtop includes a default hadoop_head_node if we do not specify
            # any namenode info. To ensure spark standalone doesn't get
            # invalid hadoop config, set our NN to an empty string.
            hosts['namenode'] = ''
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url':
            master_url,
            'spark::common::event_log_dir':
            events_log_dir,
            'spark::common::history_log_dir':
            events_log_dir,
            'spark::common::extra_lib_dirs':
            ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem':
            driver_mem,
            'spark::common::executor_mem':
            executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()

        # Do this after our puppet bits in case puppet overrides needed perms
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip, master_url)

        # Install SB (subsequent calls will reconfigure existing install)
        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
        self.install_benchmark()
Example #51
0
    def configure(self, available_hosts, zk_units, peers):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is there
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        hosts = {
            'spark': master_ip,
        }

        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        roles = self.get_roles()

        override = {
            'spark::common::master_url': self.get_master_url(master_ip),
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }

        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = ""

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
        # There is a race condition here.
        # The work role will not start the first time we trigger puppet apply.
        # The exception in /var/logs/spark:
        # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077
        # The master url is not set at the time the worker start the first time.
        # TODO(kjackal): ...do the needed... (investiate,debug,submit patch)
        bigtop.trigger_puppet()
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip)

        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = self.get_master_url(master_ip)
        # Install SB (subsequent calls will reconfigure existing install)
        self.install_benchmark()