Example #1
0
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='resourcemanager')
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # Add our ubuntu user to the hadoop and mapred groups.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
Example #2
0
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        kafka_port = self.dist_config.port('kafka')

        roles = ['kafka-server']
        override = {
            'kafka::server::broker_id': unit_num,
            'kafka::server::port': kafka_port,
            'kafka::server::zookeeper_connection_string': zk_connect,
        }
        if network_interface:
            ip = Bigtop().get_ip_for_interface(network_interface)
            override['kafka::server::bind_addr'] = ip

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()
        self.set_advertise()
        self.restart()
Example #3
0
    def install_pig(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Pig service.
        '''
        # Dirs are handled by the bigtop deb. No need to call out to
        # dist_config to do that work.
        roles = ['pig-client']

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()

        # Set app version for juju status output; pig --version looks like:
        #   Apache Pig version 0.15.0 (r: unknown)
        #   compiled Feb 06 2016, 23:00:40
        try:
            pig_out = check_output(['pig', '-x', 'local', '--version']).decode()
        except CalledProcessError as e:
            pig_out = e.output
        lines = pig_out.splitlines()
        parts = lines[0].split() if lines else []
        if len(parts) < 4:
            hookenv.log('Error getting Pig version: {}'.format(pig_out),
                        hookenv.ERROR)
            pig_ver = ''
        else:
            pig_ver = parts[3]
        hookenv.application_version_set(pig_ver)
Example #4
0
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        kafka_port = self.dist_config.port('kafka')

        roles = ['kafka-server']
        override = {
            'kafka::server::broker_id': unit_num,
            'kafka::server::port': kafka_port,
            'kafka::server::zookeeper_connection_string': zk_connect,
        }
        if network_interface:
            ip = Bigtop().get_ip_for_interface(network_interface)
            override['kafka::server::bind_addr'] = ip

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()
        self.set_advertise()
        self.restart()
def install_nodemanager(namenode, resourcemanager):
    """Install if we have FQDNs.

    We only need the master FQDNs to perform the nodemanager install, so poll
    for master host data from the appropriate relation. This allows us to
    install asap, even if '<master>.ready' is not set.
    """
    namenodes = namenode.namenodes()
    resourcemanagers = resourcemanager.resourcemanagers()
    masters = namenodes + resourcemanagers
    if namenodes and resourcemanagers and data_changed('nm.masters', masters):
        installed = is_state('apache-bigtop-nodemanager.installed')
        action = 'installing' if not installed else 'configuring'
        hookenv.status_set('maintenance', '%s nodemanager' % action)
        bigtop = Bigtop()
        bigtop.render_site_yaml(
            hosts={
                'namenode': namenodes[0],
                'resourcemanager': resourcemanagers[0],
            },
            roles=[
                'nodemanager',
                'mapred-app',
            ],
        )
        bigtop.queue_puppet()
        set_state('apache-bigtop-nodemanager.pending')
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    nn_host = get_fqdn()
    hosts = {'namenode': nn_host}
    bigtop.render_site_yaml(hosts=hosts, roles='namenode')
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='resourcemanager')
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # Add our ubuntu user to the hadoop and mapred groups.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        if unitdata.kv().get('zeppelin.version.repo', False):
            hookenv.log("An upgrade is available and the site.yaml has been "
                        "configured. Run the 'reinstall' action to continue.",
                        level=hookenv.INFO)
        else:
            ####################################################################
            # BUG: BIGTOP-2742
            # Default zeppelin init script looks for the literal '$(hostname)'
            # string. Symlink it so it exists before the apt install from puppet
            # tries to start the service.
            import subprocess
            host = subprocess.check_output(['hostname']).decode('utf8').strip()
            zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
            utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
            utils.run_as('root', 'ln', '-sf',
                         zepp_pid,
                         '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
            ####################################################################

            bigtop.trigger_puppet()
            self.wait_for_api(30)

            ####################################################################
            # BUG: BIGTOP-2742
            # Puppet apply will call systemctl daemon-reload, which removes the
            # symlink we just created. Now that the bits are on disk, update the
            # init script $(hostname) that caused this mess to begin with.
            zepp_init_script = '/etc/init.d/zeppelin'
            utils.re_edit_in_place(zepp_init_script, {
                r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
            })
            utils.run_as('root', 'systemctl', 'daemon-reload')
            self.restart()
            self.wait_for_api(30)
Example #9
0
    def install_oozie(self):
        roles = ['hadoop-client']

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()

        roles = ['oozie-client', 'oozie-server']

        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()
    def configure(self, available_hosts):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is there
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """

        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        self.install_benchmark()

        hosts = {
            'spark': available_hosts['spark-master'],
        }

        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        roles = self.get_roles()

        override = {
            'spark::common::master_url': self.get_master_url(available_hosts['spark-master']),
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
        # There is a race condition here.
        # The work role will not start the first time we trigger puppet apply.
        # The exception in /var/logs/spark:
        # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077
        # The master url is not set at the time the worker start the first time.
        # TODO(kjackal): ...do the needed... (investiate,debug,submit patch)
        bigtop.trigger_puppet()
        if 'namenode' not in available_hosts:
            # Make sure users other than spark can access the events logs dir and run jobs
            utils.run_as('root', 'chmod', '777', dc.path('spark_events'))
    def install_oozie(self):
        roles = ['hadoop-client']

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()

        roles = ['oozie-client', 'oozie-server']

        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()
Example #12
0
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(roles=[
        'mahout-client',
    ], )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    hookenv.status_set('active', 'ready')
    set_state('mahout.installed')
Example #13
0
 def trigger_bigtop(self):
     bigtop = Bigtop()
     overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                        strip=True)
     bigtop.render_site_yaml(
         roles=[
             'zeppelin-server',
         ],
         overrides=overrides,
     )
     bigtop.trigger_puppet()
     self.wait_for_api(30)
Example #14
0
    def install(self, hbase=None, zk_units=None):
        '''
        Trigger the Bigtop puppet recipe that handles the Hive service.
        '''
        # Dirs are handled by the bigtop deb. No need to call out to
        # dist_config to do that. We do want 'ubuntu' in the hive group though.
        self.dist_config.add_users()

        # Prep config
        roles = ['hive-client', 'hive-metastore', 'hive-server2']
        metastore = "thrift://{}:9083".format(hookenv.unit_private_ip())

        if hbase:
            roles.append('hive-hbase')
            hb_connect = "{}:{}".format(hbase['host'], hbase['master_port'])
            zk_hbase_connect = hbase['zk_connect']
        else:
            hb_connect = ""
            zk_hbase_connect = ""

        if zk_units:
            hive_support_concurrency = True
            zk_hive_connect = self.get_zk_connect(zk_units)
        else:
            hive_support_concurrency = False
            zk_hive_connect = ""

        override = {
            'hadoop_hive::common_config::hbase_master':
            hb_connect,
            'hadoop_hive::common_config::hbase_zookeeper_quorum':
            zk_hbase_connect,
            'hadoop_hive::common_config::hive_zookeeper_quorum':
            zk_hive_connect,
            'hadoop_hive::common_config::hive_support_concurrency':
            hive_support_concurrency,
            'hadoop_hive::common_config::metastore_uris':
            metastore,
            'hadoop_hive::common_config::server2_thrift_port':
            self.dist_config.port('hive-thrift'),
            'hadoop_hive::common_config::server2_thrift_http_port':
            self.dist_config.port('hive-thrift-web'),
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()

        # Bigtop doesn't create a hive-env.sh, but we need it for heap config
        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') /
             'hive-env.sh.template').copy(hive_env)
Example #15
0
 def trigger_bigtop(self):
     bigtop = Bigtop()
     overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                        strip=True)
     bigtop.render_site_yaml(
         roles=[
             'zeppelin-server',
         ],
         overrides=overrides,
     )
     bigtop.trigger_puppet()
     self.wait_for_api(30)
Example #16
0
def install_hadoop_client_yarn(principal, namenode, resourcemanager):
    if namenode.namenodes() and resourcemanager.resourcemanagers():
        hookenv.status_set('maintenance', 'installing plugin (yarn)')
        nn_host = namenode.namenodes()[0]
        rm_host = resourcemanager.resourcemanagers()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.yarn.installed')
        hookenv.status_set('maintenance', 'plugin (yarn) installed')
    else:
        hookenv.status_set('waiting', 'waiting for master fqdns')
def install_hadoop_client_yarn(principal, namenode, resourcemanager):
    if namenode.namenodes() and resourcemanager.resourcemanagers():
        hookenv.status_set('maintenance', 'installing plugin (yarn)')
        nn_host = namenode.namenodes()[0]
        rm_host = resourcemanager.resourcemanagers()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.yarn.installed')
        hookenv.status_set('maintenance', 'plugin (yarn) installed')
    else:
        hookenv.status_set('waiting', 'waiting for master fqdns')
Example #18
0
    def install(self, hbase=None, zk_units=None):
        '''
        Trigger the Bigtop puppet recipe that handles the Hive service.
        '''
        # Dirs are handled by the bigtop deb. No need to call out to
        # dist_config to do that. We do want 'ubuntu' in the hive group though.
        self.dist_config.add_users()

        # Prep config
        roles = ['hive-client', 'hive-metastore', 'hive-server2']
        metastore = "thrift://{}:9083".format(hookenv.unit_private_ip())

        if hbase:
            roles.append('hive-hbase')
            hb_connect = "{}:{}".format(hbase['host'], hbase['master_port'])
            zk_hbase_connect = hbase['zk_connect']
        else:
            hb_connect = ""
            zk_hbase_connect = ""

        if zk_units:
            hive_support_concurrency = True
            zk_hive_connect = self.get_zk_connect(zk_units)
        else:
            hive_support_concurrency = False
            zk_hive_connect = ""

        override = {
            'hadoop_hive::common_config::hbase_master': hb_connect,
            'hadoop_hive::common_config::hbase_zookeeper_quorum':
                zk_hbase_connect,
            'hadoop_hive::common_config::hive_zookeeper_quorum':
                zk_hive_connect,
            'hadoop_hive::common_config::hive_support_concurrency':
                hive_support_concurrency,
            'hadoop_hive::common_config::metastore_uris': metastore,
            'hadoop_hive::common_config::server2_thrift_port':
                self.dist_config.port('hive-thrift'),
            'hadoop_hive::common_config::server2_thrift_http_port':
                self.dist_config.port('hive-thrift-web'),
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()

        # Bigtop doesn't create a hive-env.sh, but we need it for heap config
        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy(
                hive_env)
Example #19
0
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        roles=[
            'mahout-client',
        ],
    )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    hookenv.status_set('active', 'ready')
    set_state('mahout.installed')
    def configure(self, hosts, zk_units):
        zk_connect = self.get_zk_connect(zk_units)
        roles = ['hbase-server', 'hbase-master', 'hbase-client']
        override = {
            'bigtop::hbase_thrift_port': self.dist_config.port('hbase-thrift'),
            'hadoop_hbase::client::thrift': True,
            'hadoop_hbase::common_config::heap_size': hookenv.config()['heap'],
            'hadoop_hbase::common_config::zookeeper_quorum': zk_connect,
            'hadoop_hbase::deploy::auxiliary': False,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
Example #21
0
    def install(self, nodes=None):
        '''
        Write out the config, then run puppet.

        After this runs, we should have a configured and running service.

        '''
        bigtop = Bigtop()
        log("Rendering site yaml ''with overrides: {}".format(self._override))
        bigtop.render_site_yaml(self._hosts, self._roles, self._override)
        bigtop.trigger_puppet()
        if self.is_zk_leader():
            zkpeer = RelationBase.from_state('zkpeer.joined')
            zkpeer.set_zk_leader()
Example #22
0
    def install(self, nodes=None):
        '''
        Write out the config, then run puppet.

        After this runs, we should have a configured and running service.

        '''
        bigtop = Bigtop()
        log("Rendering site yaml ''with overrides: {}".format(self._override))
        bigtop.render_site_yaml(self._hosts, self._roles, self._override)
        bigtop.trigger_puppet()
        if self.is_zk_leader():
            zkpeer = RelationBase.from_state('zkpeer.joined')
            zkpeer.set_zk_leader()
Example #23
0
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(roles=[
        'mahout-client',
    ], )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    set_state('mahout.installed')
    hookenv.status_set('active', 'ready')
    # set app version string for juju status output
    mahout_version = get_package_version('mahout') or 'unknown'
    hookenv.application_version_set(mahout_version)
Example #24
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)
        bigtop.render_site_yaml(
            roles=[
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        bigtop.trigger_puppet()
        self.wait_for_api(30)
Example #25
0
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        roles=[
            'mahout-client',
        ],
    )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    set_state('mahout.installed')
    hookenv.status_set('active', 'ready')
    # set app version string for juju status output
    mahout_version = get_package_version('mahout') or 'unknown'
    hookenv.application_version_set(mahout_version)
Example #26
0
    def configure(self, hosts, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append(ip)
        zks.sort()
        zk_connect = ",".join(zks)

        roles = ['hbase-server', 'hbase-master', 'hbase-client']

        override = {
            'hadoop_hbase::common_config::zookeeper_quorum': zk_connect,
            'hadoop_hbase::deploy::auxiliary': False
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
Example #27
0
def install_hadoop_client_hdfs(principal, namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the plugin install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing plugin (hdfs)')
        nn_host = namenode.namenodes()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.hdfs.installed')
        hookenv.status_set('maintenance', 'plugin (hdfs) installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
Example #28
0
def install_hadoop_client_hdfs(principal, namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the plugin install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing plugin (hdfs)')
        nn_host = namenode.namenodes()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.hdfs.installed')
        hookenv.status_set('maintenance', 'plugin (hdfs) installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
Example #29
0
    def configure(self, hosts, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit["host"])
            zks.append(ip)
        zks.sort()
        zk_connect = ",".join(zks)

        roles = ["hbase-server", "hbase-master", "hbase-client"]

        override = {
            "hadoop_hbase::common_config::zookeeper_quorum": zk_connect,
            "hadoop_hbase::deploy::auxiliary": False,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
Example #30
0
def install_giraph(giraph):
    """Install giraph when prerequisite states are present."""
    hookenv.status_set('maintenance', 'installing giraph')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        roles=[
            'giraph-client',
        ],
    )
    bigtop.trigger_puppet()

    # Put down the -doc subpackage so we get giraph-examples
    fetch.apt_install('giraph-doc')

    giraph_home = Path('/usr/lib/giraph')
    giraph_docdir = Path('/usr/share/doc/giraph')
    giraph_libdir = Path(giraph_home / 'lib')
    giraph_examples = glob('{}/giraph-examples-*.jar'.format(giraph_docdir))

    # Gather a list of all the giraph jars (needed for -libjars)
    giraph_jars = giraph_examples
    giraph_jars.extend(get_good_jars(giraph_home, prefix=True))
    giraph_jars.extend(get_good_jars(giraph_libdir, prefix=True))

    # Update environment with appropriate giraph bits. HADOOP_CLASSPATH can
    # use wildcards (and it should for readability), but GIRAPH_JARS, which
    # is intended to be used as 'hadoop jar -libjars $GIRAPH_JARS', needs to
    # be a comma-separate list of jars.
    with utils.environment_edit_in_place('/etc/environment') as env:
        cur_cp = env['HADOOP_CLASSPATH'] if 'HADOOP_CLASSPATH' in env else ""
        env['GIRAPH_HOME'] = giraph_home
        env['HADOOP_CLASSPATH'] = "{examples}/*:{home}/*:{libs}/*:{cp}".format(
            examples=giraph_docdir,
            home=giraph_home,
            libs=giraph_libdir,
            cp=cur_cp
        )
        env['GIRAPH_JARS'] = ','.join(j for j in giraph_jars)

    set_state('giraph.installed')
    report_status()
    # set app version string for juju status output
    giraph_version = get_package_version('giraph') or 'unknown'
    hookenv.application_version_set(giraph_version)
Example #31
0
def install_giraph(giraph):
    """Install giraph when prerequisite states are present."""
    hookenv.status_set('maintenance', 'installing giraph')
    bigtop = Bigtop()
    bigtop.render_site_yaml(roles=[
        'giraph-client',
    ], )
    bigtop.trigger_puppet()

    # Put down the -doc subpackage so we get giraph-examples
    fetch.apt_install('giraph-doc')

    giraph_home = Path('/usr/lib/giraph')
    giraph_docdir = Path('/usr/share/doc/giraph')
    giraph_libdir = Path(giraph_home / 'lib')
    giraph_examples = glob('{}/giraph-examples-*.jar'.format(giraph_docdir))

    # Gather a list of all the giraph jars (needed for -libjars)
    giraph_jars = giraph_examples
    giraph_jars.extend(get_good_jars(giraph_home, prefix=True))
    giraph_jars.extend(get_good_jars(giraph_libdir, prefix=True))

    # Update environment with appropriate giraph bits. HADOOP_CLASSPATH can
    # use wildcards (and it should for readability), but GIRAPH_JARS, which
    # is intended to be used as 'hadoop jar -libjars $GIRAPH_JARS', needs to
    # be a comma-separate list of jars.
    with utils.environment_edit_in_place('/etc/environment') as env:
        cur_cp = env['HADOOP_CLASSPATH'] if 'HADOOP_CLASSPATH' in env else ""
        env['GIRAPH_HOME'] = giraph_home
        env['HADOOP_CLASSPATH'] = "{examples}/*:{home}/*:{libs}/*:{cp}".format(
            examples=giraph_docdir,
            home=giraph_home,
            libs=giraph_libdir,
            cp=cur_cp)
        env['GIRAPH_JARS'] = ','.join(j for j in giraph_jars)

    set_state('giraph.installed')
    report_status()
    # set app version string for juju status output
    giraph_version = get_package_version('giraph') or 'unknown'
    hookenv.application_version_set(giraph_version)
Example #32
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
        # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0.
        overrides={
            'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
        }
    )
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # We need to create the 'mapred' and 'spark' user/group since we may not
    # be installing hadoop-mapreduce or spark on this machine. This is needed
    # so the namenode can access yarn and spark job history files in hdfs. Also
    # add our ubuntu user to the hadoop, mapred, and spark groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Example #33
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        bigtop.trigger_puppet()
        self.wait_for_api(30)
Example #34
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        bigtop.trigger_puppet()
        self.wait_for_api(30)
Example #35
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
        # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0.
        overrides={
            'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
        })
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
def install_datanode(namenode):
    """
    Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the datanode install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    namenodes = namenode.namenodes()
    if namenodes and data_changed('datanode.namenodes', namenodes):
        installed = is_state('apache-bigtop-datanode.installed')
        action = 'installing' if not installed else 'configuring'
        hookenv.status_set('maintenance', '%s datanode' % action)
        bigtop = Bigtop()
        bigtop.render_site_yaml(
            hosts={
                'namenode': namenodes[0],
            },
            roles=[
                'datanode',
            ],
        )
        bigtop.queue_puppet()
        set_state('apache-bigtop-datanode.pending')
Example #37
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
    )
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Example #38
0
    def configure(self, available_hosts, zk_units, peers):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is there
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        hosts = {
            'spark': master_ip,
        }

        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        roles = self.get_roles()

        override = {
            'spark::common::master_url': self.get_master_url(master_ip),
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }

        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = ""

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
        # There is a race condition here.
        # The work role will not start the first time we trigger puppet apply.
        # The exception in /var/logs/spark:
        # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077
        # The master url is not set at the time the worker start the first time.
        # TODO(kjackal): ...do the needed... (investiate,debug,submit patch)
        bigtop.trigger_puppet()
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip)

        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = self.get_master_url(master_ip)
        # Install SB (subsequent calls will reconfigure existing install)
        self.install_benchmark()
Example #39
0
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        # Hosts
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()

        # Ports
        rm_ipc = get_layer_opts().port('resourcemanager')
        rm_http = get_layer_opts().port('rm_webapp_http')
        jh_ipc = get_layer_opts().port('jobhistory')
        jh_http = get_layer_opts().port('jh_webapp_http')
        hdfs_port = namenode.port()
        webhdfs_port = namenode.webhdfs_port()

        bigtop = Bigtop()
        bigtop.render_site_yaml(
            hosts={
                'namenode': nn_host,
                'resourcemanager': rm_host,
            },
            roles=[
                'resourcemanager',
            ],
            # NB: When we colocate the NN and RM, the RM will run puppet apply
            # last. To ensure we don't lose any hdfs-site.xml data set by the
            # NN, override common_hdfs properties again here.
            overrides={
                'hadoop::common_yarn::hadoop_rm_port': rm_ipc,
                'hadoop::common_yarn::hadoop_rm_webapp_port': rm_http,
                'hadoop::common_yarn::hadoop_rm_bind_host': '0.0.0.0',
                'hadoop::common_mapred_app::mapreduce_jobhistory_host': '0.0.0.0',
                'hadoop::common_mapred_app::mapreduce_jobhistory_port': jh_ipc,
                'hadoop::common_mapred_app::mapreduce_jobhistory_webapp_port': jh_http,
                'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
                'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
                'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
                'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
                'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
            }
        )
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # We need to create the 'spark' user/group since we may not be
        # installing spark on this machine. This is needed so the history
        # server can access spark job history files in hdfs. Also add our
        # ubuntu user to the hadoop, mapred, and spark groups on this machine.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
class TestBigtopUnit(Harness):
    '''
    Unit tests for Bigtop class.

    '''

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def setUp(self, mock_ver, mock_hookenv):
        mock_ver.return_value = '1.2.0'
        super(TestBigtopUnit, self).setUp()
        self.bigtop = Bigtop()

    def test_init(self):
        '''
        Verify that the Bigtop class can init itself, and that it has some
        of the properties that we expect..

        '''
        # paths should be Path objects.
        self.assertEqual(type(self.bigtop.bigtop_base), Path)
        self.assertEqual(type(self.bigtop.site_yaml), Path)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_puppet_modules')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_reverse_dns')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_localdomain')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_java')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_swap')
    @mock.patch('charms.layer.apache_bigtop_base.is_container')
    def test_install(self, mock_container, mock_swap, mock_java, mock_pin,
                     mock_local, mock_dns, mock_fetch, mock_puppet, mock_apply,
                     mock_hiera):
        '''
        Verify install calls expected class methods.

        '''
        mock_container.return_value = False
        self.bigtop.install()
        self.assertTrue(mock_swap.called)
        self.assertTrue(mock_java.called)
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_local.called)
        self.assertTrue(mock_dns.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_puppet.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_hiera.called)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    def test_refresh_bigtop_release(self, mock_pin, mock_fetch, mock_apply,
                                    mock_update):
        '''
        Verify refresh calls expected class methods.

        '''
        self.bigtop.refresh_bigtop_release()
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_get_repo_url(self, mock_ver, mock_lsb_release,
                          mock_utils, mock_hookenv):
        '''
        Verify that we setup an appropriate repository.

        '''
        mock_ver.return_value = '1.1.0'

        # non-ubuntu should throw an exception
        mock_lsb_release.return_value = {'DISTRIB_ID': 'centos'}
        self.assertRaises(
            BigtopError,
            self.bigtop.get_repo_url,
            '1.1.0')

        # 1.1.0 on trusty/non-power
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/trusty/foo'))

        # 1.1.0 on trusty/power (should return vivid url)
        mock_utils.cpu_arch.return_value = 'ppc64le'
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/vivid/ppc64el'))

        # 1.2.0 on xenial
        mock_ver.return_value = '1.2.0'
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.2.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.2.0/ubuntu/16.04/foo'))

        # 1.2.1 on xenial/intel
        mock_hookenv.return_value = {'name': 'foo'}
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(self.bigtop.get_repo_url('1.2.1'),
                         ('http://repos.bigtop.apache.org/releases/'
                          '1.2.1/ubuntu/16.04/x86_64'))

        # 1.2.1 on xenial/non-intel
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(self.bigtop.get_repo_url('1.2.1'),
                         ('https://ci.bigtop.apache.org/job/Bigtop-1.2.1/'
                          'OS=ubuntu-16.04/lastSuccessfulBuild/artifact/output/apt'))

        # master on xenial/intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(self.bigtop.get_repo_url('master'),
                         ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
                          'OS=ubuntu-16.04,label=docker-slave/ws/output/apt'))

        # master on xenial/non-intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(self.bigtop.get_repo_url('master'),
                         ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
                          'OS=ubuntu-16.04-foo,label=docker-slave/ws/output/apt'))

        # test bad version on xenial should throw an exception
        self.assertRaises(
            BigtopError,
            self.bigtop.get_repo_url,
            '0.0.0')

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_install_swap_when_swap_exists(self, mock_sub):
        '''
        Verify we do attempt to install swap space if it already exists.

        '''
        mock_sub.check_output.return_value = b"foo\nbar"
        mock_sub.reset_mock()
        self.bigtop.install_swap()

        # We reset the mock, so here we're verifying no other subprocess
        # calls were made.
        mock_sub.check_call.assert_not_called()

    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.fetch')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_install_java(self, mock_options, mock_fetch,
                          mock_utils, mock_lsb_release):
        '''
        Test to verify that we install java when requested.

        '''
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial'}

        # Should be noop if install_java layer opt is not set.
        self.bigtop.options.get.return_value = ''
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertFalse(mock_fetch.apt_install.called)
        self.assertFalse(mock_utils.re_edit_in_place.called)

        # Should apt install if install_java layer opt is set.
        self.bigtop.options.get.return_value = 'foo'
        print("options: {}".format(self.bigtop.options))
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertTrue(mock_fetch.apt_install.called)
        self.assertTrue(mock_utils.re_edit_in_place.called)

        # On trusty, should add a ppa so that we can install Java 8.
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'trusty'}
        self.bigtop.install_java()
        self.assertTrue(mock_fetch.add_source.called)
        self.assertTrue(mock_fetch.apt_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_pin_bigtop_packages(self, mock_path):
        '''
        Verify the apt template is opened and written to a (mocked) file.

        '''
        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst

        self.bigtop.pin_bigtop_packages(priority=100)
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_apt',
                new_callable=mock.PropertyMock)
    def test_update_bigtop_repo(self, mock_apt, mock_lsb_release, mock_sub):
        '''
        Verify a bigtop apt repository is added/removed.

        '''
        # non-ubuntu should not invoke a subprocess call
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'foo',
                                         'DISTRIB_ID': 'centos',
                                         'DISTRIB_RELEASE': '7'}
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_not_called()

        # verify args when adding a repo on ubuntu
        mock_apt.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial',
                                         'DISTRIB_ID': 'ubuntu',
                                         'DISTRIB_RELEASE': '16.04'}
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yu', 'deb foo bigtop contrib'])

        # verify args when removing a repo on ubuntu
        self.bigtop.update_bigtop_repo(remove=True)
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yur', 'deb foo bigtop contrib'])

        # verify we handle check_call errors
        class MockException(Exception):
            pass
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        mock_sub.check_call.side_effect = mock_raise
        self.bigtop.update_bigtop_repo()

    @mock.patch('charms.layer.apache_bigtop_base.get_package_version')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess.Popen')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    def test_check_bigtop_repo_package(self, mock_lsb_release, mock_sub,
                                       mock_hookenv, mock_pkg_ver):
        '''
        Verify bigtop repo package queries.

        '''
        # non-ubuntu should raise an error
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'foo',
                                         'DISTRIB_ID': 'centos',
                                         'DISTRIB_RELEASE': '7'}
        self.assertRaises(BigtopError,
                          self.bigtop.check_bigtop_repo_package,
                          'foo')

        # reset with ubuntu
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial',
                                         'DISTRIB_ID': 'ubuntu',
                                         'DISTRIB_RELEASE': '16.04'}

        madison_proc = mock.Mock()
        grep_proc = mock.Mock()

        # simulate a missing repo pkg
        grep_attrs = {'communicate.return_value': (b'', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing repo pkg (message should be logged)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_called_once()
        mock_hookenv.reset_mock()

        # reset our grep args to simulate the repo pkg being found
        grep_attrs = {'communicate.return_value': (b'pkg|1|repo', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo and installed pkg versions are the same (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '1'
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo pkg is newer than installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '0'
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

    @mock.patch('charms.layer.apache_bigtop_base.socket')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_check_reverse_dns(self, mock_hookenv, mock_utils,
                               mock_sub, mock_socket):
        '''
        Verify that we set the reverse_dns_ok state, and handle errors
        correctly.

        '''
        # Test the case where things succeed.
        mock_sub.check_output.return_value = b'domain'
        self.bigtop.check_reverse_dns()
        self.assertTrue(unitdata.kv().get('reverse_dns_ok'))

        # Test the case where we get an exception.
        mock_sub.check_output.return_value = b'localdomain'
        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

        class MockHError(Exception):
            pass

        def raise_herror(*args, **kwargs):
            raise MockHError('test')
        mock_socket.herror = MockHError
        mock_socket.gethostbyaddr = raise_herror

        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_fetch_bigtop_release(self, mock_path, mock_hookenv, mock_ver):
        '''Verify we raise an exception if an invalid release is specified.'''
        mock_hookenv.resource_get.return_value = False
        mock_ver.return_value = 'foo'
        self.assertRaises(
            BigtopError,
            self.bigtop.fetch_bigtop_release)

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_install_puppet_modules(self, mock_hookenv, mock_utils):
        '''Verify that we seem to install puppet modules correctly.'''
        mock_hookenv.charm_dir.return_value = '/tmp'

        def mock_run_as(user, *args):
            '''
            Verify that we run puppet as root.

            '''
            self.assertEqual(user, 'root')

        mock_utils.run_as.side_effect = mock_run_as
        self.bigtop.install_puppet_modules()

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.glob')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    def test_apply_patches(self, mock_chdir, mock_glob, mock_utils,
                           mock_hookenv):
        '''
        Verify that we apply patches in the correct order.

        '''
        mock_hookenv.charm_dir.return_value = '/tmp'

        reverse_sorted = ['foo', 'baz', 'bar']
        mock_glob.return_value = ['foo', 'baz', 'bar']

        def mock_run_as(*args):
            patch = args[-1]
            self.assertEqual(args[0], 'root')
            # Verify that we're running on a sorted list.
            self.assertTrue(patch.endswith(reverse_sorted.pop()))

        mock_utils.run_as.side_effect = mock_run_as

        self.bigtop.apply_patches()

    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_base')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_hiera_yaml(self, mock_path, mock_base, mock_yaml):
        '''
        Verify that we attempt to add the values that we expect our hiera
        object, before writing it out to a (mocked) yaml file.

        '''
        def mock_dump(hiera_yaml, *args, **kwargs):
            self.assertTrue(hiera_yaml.get(':yaml'))
            self.assertTrue(':datadir' in hiera_yaml[':yaml'])

        mock_yaml.dump.side_effect = mock_dump

        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst
        mock_yaml.load.return_value = defaultdict(lambda: {})
        mock_base.__div__.side_effect = lambda rel: mock_base
        mock_base.__truediv__.side_effect = lambda rel: mock_base

        self.bigtop.render_hiera_yaml()

        # Verify that we attempt to write yaml::datadir to hieradata.
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.site_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_site_yaml(self, mock_path, mock_site, mock_yaml, mock_run):
        '''
        Verify that we attempt to put together a plausible site yaml
        config, before writing it out to a (mocked) yaml file.

        '''

        # Setup
        mock_yaml.load.return_value = defaultdict(lambda: {})
        config = {
            'roles': None,
            'overrides': None,
            'hosts': None
        }

        def verify_yaml(yaml, *args, **kwargs):
            '''
            Verify that the dict we are trying to dump to yaml has the values
            that we expect.

            '''
            self.assertTrue('bigtop::bigtop_repo_uri' in yaml)
            if config['roles'] is None:
                self.assertFalse('bigtop::roles_enabled' in yaml)
            else:
                self.assertTrue('bigtop::roles_enabled' in yaml)
                self.assertTrue('bigtop::roles' in yaml)
                self.assertEqual(
                    yaml['bigtop::roles'],
                    sorted(config['roles'])
                )
            if config['overrides'] is not None:
                for key in config['overrides']:
                    self.assertTrue(yaml.get(key) == config['overrides'][key])

        mock_yaml.dump.side_effect = verify_yaml

        # Test various permutations of arguments passed in.
        for config_set in [
                {'roles': ['foo', 'bar', 'baz']},  # Test roles
                {'overrides': {'foo': 'bar'}}]:  # Test override
            config.update(config_set)

            # Test
            self.bigtop.render_site_yaml(
                roles=config['roles'],
                overrides=config['overrides'],
                hosts=config['hosts'])

            # Reset
            mock_yaml.load.return_value = defaultdict(lambda: {})
            config['roles'] = None
            config['overrides'] = None
            config['hosts'] = None

    def test_queue_puppet(self):
        '''Verify that we set the expected 'puppet queued' state.'''

        self.bigtop.queue_puppet()
        self.assertTrue(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_handle_queued_puppet(self, mock_ver, mock_hookenv, mock_trigger):
        '''
        Verify that we attempt to call puppet when it has been queued, and
        then clear the queued state.

        '''
        set_state('apache-bigtop-base.puppet_queued')
        mock_ver.return_value = '1.2.0'
        Bigtop._handle_queued_puppet()
        self.assertTrue(mock_trigger.called)
        self.assertFalse(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.unitdata')
    def test_trigger_puppet(self, mock_unit, mock_chdir, mock_utils):
        '''
        Test to verify that we attempt to trigger puppet correctly.

        '''
        def verify_utils_call(user, puppet, *args):
            self.assertEqual(user, 'root')
            self.assertEqual(puppet, 'puppet')

        mock_kv = mock.Mock()
        mock_unit.kv.return_value = mock_kv
        mock_kv.get.return_value = 'foo'

        mock_utils.run_as.side_effect = verify_utils_call

        self.bigtop.trigger_puppet()

        self.assertTrue(mock_utils.run_as.called)

        # TODO: verify the Java 1.7 logic.

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    def test_check_hdfs_setup(self, mock_run, mock_sub):
        '''
        Verify that our hdfs setup check works as expected, and handles
        errors as expected.

        '''
        class MockException(Exception):
            pass
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        for s in ['ubuntu', '   ubuntu  ', 'ubuntu  ', '  ubuntu']:
            mock_run.return_value = s
            self.assertTrue(self.bigtop.check_hdfs_setup())

        for s in ['foo', '   ', '', ' bar', 'notubuntu', 'ubuntu not ']:
            mock_run.return_value = s
            self.assertFalse(self.bigtop.check_hdfs_setup())

        mock_run.side_effect = mock_raise
        self.assertFalse(self.bigtop.check_hdfs_setup())

    @unittest.skip('noop')
    def test_spec(self):
        '''Nothing to test that the linter won't handle.'''

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.chownr')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_run_smoke_tests(self, mock_options, mock_ownr, mock_chdir,
                             mock_run, mock_sub):
        '''
        Verify that we attempt to run smoke tests correctly, and handle
        exceptions as expected.

        '''
        mock_options.return_value = {}
        # Returns None if bigtop isn't available.
        remove_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Returns None if we don't pass in a 'smoke_components' arg
        set_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Should return 'success' if all went well.
        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            'success'
        )

        # Should return error message if subprocess raised an Exception.
        class MockException(Exception):
            pass
        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')
        mock_run.side_effect = mock_raise

        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            "test output"
        )

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_reinstall_repo_packages(self, mock_sub, mock_trigger, mock_pin,
                                     mock_path, mock_hiera, mock_update):
        '''
        Verify that we attempt to trigger puppet during a reinstall, and handle
        exceptions as expected.

        '''
        class MockException(Exception):
            pass
        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        # Should return error message if apt-get remove raised an Exception.
        mock_sub.check_call.side_effect = mock_raise
        self.assertEqual(
            self.bigtop.reinstall_repo_packages(remove_pkgs='foo bar-*'),
            "test output"
        )

        # Should call pin twice if trigger puppet fails (once to raise prio,
        # once again to drop it back down)
        mock_trigger.side_effect = mock_raise
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'failed')
        self.assertEqual(mock_pin.call_count, 2)

        # Should return 'success' if all went well.
        mock_trigger.side_effect = None
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'success')

    def test_get_ip_for_interface(self):
        '''
        Test to verify that our get_ip_for_interface method does sensible
        things.

        '''
        ip = self.bigtop.get_ip_for_interface('lo')
        self.assertEqual(ip, '127.0.0.1')

        ip = self.bigtop.get_ip_for_interface('127.0.0.0/24')
        self.assertEqual(ip, '127.0.0.1')

        # If passed 0.0.0.0, or something similar, the function should
        # treat it as a special case, and return what it was passed.
        for i in ['0.0.0.0', '0.0.0.0/0', '0/0', '::']:
            ip = self.bigtop.get_ip_for_interface(i)
            self.assertEqual(ip, i)

        self.assertRaises(
            BigtopError,
            self.bigtop.get_ip_for_interface,
            '2.2.2.0/24')

        self.assertRaises(
            BigtopError,
            self.bigtop.get_ip_for_interface,
            'foo')
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(
            available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']
        if mode.startswith('yarn'):
            spark_events = 'hdfs://{}'.format(dc.path('spark_events'))
        else:
            spark_events = 'file://{}'.format(dc.path('spark_events'))

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. "
                    "Using 1g default.",
                    level=hookenv.WARNING)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. "
                    "Using 1g default.",
                    level=hookenv.WARNING)
        else:
            executor_mem = req_executor_mem

        # Some spark applications look for envars in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
            env['SPARK_HOME'] = dc.path('spark_home')

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url':
            master_url,
            'spark::common::event_log_dir':
            spark_events,
            'spark::common::history_log_dir':
            spark_events,
            'spark::common::extra_lib_dirs':
            ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem':
            driver_mem,
            'spark::common::executor_mem':
            executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet.
        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        if unitdata.kv().get('spark.version.repo', False):
            hookenv.log(
                "An upgrade is available and the site.yaml has been "
                "configured. Run the 'reinstall' action to continue.",
                level=hookenv.INFO)
        else:
            bigtop.trigger_puppet()
            self.patch_worker_master_url(master_ip, master_url)

            # Packages don't create the event dir by default. Do it each time
            # spark is (re)installed to ensure location/perms are correct.
            self.configure_events_dir(mode)

        # Handle examples and Spark-Bench. Do this each time this method is
        # called in case we need to act on a new resource or user config.
        self.configure_examples()
        self.configure_sparkbench()
Example #42
0
    def configure(self, available_hosts, zk_units, peers):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is ready
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """
        # Bootstrap spark
        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url': master_url,
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()

        # Do this after our puppet bits in case puppet overrides needed perms
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip, master_url)

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            executor_mem = req_executor_mem

        spark_env = '/etc/spark/conf/spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DRIVER_MEMORY.*': 'export SPARK_DRIVER_MEMORY={}'.format(driver_mem),
            r'.*SPARK_EXECUTOR_MEMORY.*': 'export SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
        }, append_non_matches=True)

        # Install SB (subsequent calls will reconfigure existing install)
        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
        self.install_benchmark()
Example #43
0
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']
        if mode.startswith('yarn'):
            spark_events = 'hdfs://{}'.format(dc.path('spark_events'))
        else:
            spark_events = 'file://{}'.format(dc.path('spark_events'))

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. "
                            "Using 1g default.", level=hookenv.WARNING)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. "
                            "Using 1g default.", level=hookenv.WARNING)
        else:
            executor_mem = req_executor_mem

        # Some spark applications look for envars in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
            env['SPARK_HOME'] = dc.path('spark_home')

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url': master_url,
            'spark::common::event_log_dir': spark_events,
            'spark::common::history_log_dir': spark_events,
            'spark::common::extra_lib_dirs':
                ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem': driver_mem,
            'spark::common::executor_mem': executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet.
        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        if unitdata.kv().get('spark.version.repo', False):
            hookenv.log("An upgrade is available and the site.yaml has been "
                        "configured. Run the 'reinstall' action to continue.",
                        level=hookenv.INFO)
        else:
            bigtop.trigger_puppet()
            self.patch_worker_master_url(master_ip, master_url)

            # Packages don't create the event dir by default. Do it each time
            # spark is (re)installed to ensure location/perms are correct.
            self.configure_events_dir(mode)

        # Handle examples and Spark-Bench. Do this each time this method is
        # called in case we need to act on a new resource or user config.
        self.configure_examples()
        self.configure_sparkbench()
Example #44
0
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Bootstrap spark
        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(
            available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            executor_mem = req_executor_mem

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()
        else:
            # Bigtop includes a default hadoop_head_node if we do not specify
            # any namenode info. To ensure spark standalone doesn't get
            # invalid hadoop config, set our NN to an empty string.
            hosts['namenode'] = ''
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url':
            master_url,
            'spark::common::event_log_dir':
            events_log_dir,
            'spark::common::history_log_dir':
            events_log_dir,
            'spark::common::extra_lib_dirs':
            ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem':
            driver_mem,
            'spark::common::executor_mem':
            executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()

        # Do this after our puppet bits in case puppet overrides needed perms
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip, master_url)

        # Install SB (subsequent calls will reconfigure existing install)
        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
        self.install_benchmark()
class TestBigtopUnit(Harness):
    '''
    Unit tests for Bigtop class.

    '''
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def setUp(self, mock_ver, mock_hookenv):
        mock_ver.return_value = '1.2.0'
        super(TestBigtopUnit, self).setUp()
        self.bigtop = Bigtop()

    def test_init(self):
        '''
        Verify that the Bigtop class can init itself, and that it has some
        of the properties that we expect..

        '''
        # paths should be Path objects.
        self.assertEqual(type(self.bigtop.bigtop_base), Path)
        self.assertEqual(type(self.bigtop.site_yaml), Path)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_puppet_modules'
                )
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_reverse_dns')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_localdomain')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_java')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_swap')
    @mock.patch('charms.layer.apache_bigtop_base.is_container')
    def test_install(self, mock_container, mock_swap, mock_java, mock_pin,
                     mock_local, mock_dns, mock_fetch, mock_puppet, mock_apply,
                     mock_hiera):
        '''
        Verify install calls expected class methods.

        '''
        mock_container.return_value = False
        self.bigtop.install()
        self.assertTrue(mock_swap.called)
        self.assertTrue(mock_java.called)
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_local.called)
        self.assertTrue(mock_dns.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_puppet.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_hiera.called)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    def test_refresh_bigtop_release(self, mock_pin, mock_fetch, mock_apply,
                                    mock_update):
        '''
        Verify refresh calls expected class methods.

        '''
        self.bigtop.refresh_bigtop_release()
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_get_repo_url(self, mock_ver, mock_lsb_release, mock_utils,
                          mock_hookenv):
        '''
        Verify that we setup an appropriate repository.

        '''
        mock_ver.return_value = '1.1.0'

        # non-ubuntu should throw an exception
        mock_lsb_release.return_value = {'DISTRIB_ID': 'centos'}
        self.assertRaises(BigtopError, self.bigtop.get_repo_url, '1.1.0')

        # 1.1.0 on trusty/non-power
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/trusty/foo'))

        # 1.1.0 on trusty/power (should return vivid url)
        mock_utils.cpu_arch.return_value = 'ppc64le'
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/vivid/ppc64el'))

        # 1.2.0 on xenial
        mock_ver.return_value = '1.2.0'
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.2.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.2.0/ubuntu/16.04/foo'))

        # 1.2.1 on xenial/intel
        mock_hookenv.return_value = {'name': 'foo'}
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(self.bigtop.get_repo_url('1.2.1'),
                         ('http://repos.bigtop.apache.org/releases/'
                          '1.2.1/ubuntu/16.04/x86_64'))

        # 1.2.1 on xenial/non-intel
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(
            self.bigtop.get_repo_url('1.2.1'),
            ('https://ci.bigtop.apache.org/job/Bigtop-1.2.1/'
             'OS=ubuntu-16.04/lastSuccessfulBuild/artifact/output/apt'))

        # master on xenial/intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(
            self.bigtop.get_repo_url('master'),
            ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
             'OS=ubuntu-16.04,label=docker-slave/ws/output/apt'))

        # master on xenial/non-intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(
            self.bigtop.get_repo_url('master'),
            ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
             'OS=ubuntu-16.04-foo,label=docker-slave/ws/output/apt'))

        # test bad version on xenial should throw an exception
        self.assertRaises(BigtopError, self.bigtop.get_repo_url, '0.0.0')

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_install_swap_when_swap_exists(self, mock_sub):
        '''
        Verify we do attempt to install swap space if it already exists.

        '''
        mock_sub.check_output.return_value = b"foo\nbar"
        mock_sub.reset_mock()
        self.bigtop.install_swap()

        # We reset the mock, so here we're verifying no other subprocess
        # calls were made.
        mock_sub.check_call.assert_not_called()

    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.fetch')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_install_java(self, mock_options, mock_fetch, mock_utils,
                          mock_lsb_release):
        '''
        Test to verify that we install java when requested.

        '''
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial'}

        # Should be noop if bigtop_jdk not set.
        self.bigtop.options.get.return_value = ''
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertFalse(mock_fetch.apt_install.called)
        self.assertFalse(mock_utils.re_edit_in_place.called)

        # Should add ppa if we have set bigtop_jdk.
        self.bigtop.options.get.return_value = 'foo'
        print("options: {}".format(self.bigtop.options))
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertTrue(mock_fetch.apt_install.called)
        self.assertTrue(mock_utils.re_edit_in_place.called)

        # On trusty, should add a ppa so that we can install Java 8.
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'trusty'}
        self.bigtop.install_java()
        self.assertTrue(mock_fetch.add_source.called)
        self.assertTrue(mock_fetch.apt_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_pin_bigtop_packages(self, mock_path):
        '''
        Verify the apt template is opened and written to a (mocked) file.

        '''
        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst

        self.bigtop.pin_bigtop_packages(priority=100)
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_apt',
                new_callable=mock.PropertyMock)
    def test_update_bigtop_repo(self, mock_apt, mock_lsb_release, mock_sub):
        '''
        Verify a bigtop apt repository is added/removed.

        '''
        # non-ubuntu should not invoke a subprocess call
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'foo',
            'DISTRIB_ID': 'centos',
            'DISTRIB_RELEASE': '7'
        }
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_not_called()

        # verify args when adding a repo on ubuntu
        mock_apt.return_value = 'foo'
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'xenial',
            'DISTRIB_ID': 'ubuntu',
            'DISTRIB_RELEASE': '16.04'
        }
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yu', 'deb foo bigtop contrib'])

        # verify args when removing a repo on ubuntu
        self.bigtop.update_bigtop_repo(remove=True)
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yur', 'deb foo bigtop contrib'])

        # verify we handle check_call errors
        class MockException(Exception):
            pass

        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        mock_sub.check_call.side_effect = mock_raise
        self.bigtop.update_bigtop_repo()

    @mock.patch('charms.layer.apache_bigtop_base.get_package_version')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess.Popen')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    def test_check_bigtop_repo_package(self, mock_lsb_release, mock_sub,
                                       mock_hookenv, mock_pkg_ver):
        '''
        Verify bigtop repo package queries.

        '''
        # non-ubuntu should raise an error
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'foo',
            'DISTRIB_ID': 'centos',
            'DISTRIB_RELEASE': '7'
        }
        self.assertRaises(BigtopError, self.bigtop.check_bigtop_repo_package,
                          'foo')

        # reset with ubuntu
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'xenial',
            'DISTRIB_ID': 'ubuntu',
            'DISTRIB_RELEASE': '16.04'
        }

        madison_proc = mock.Mock()
        grep_proc = mock.Mock()

        # simulate a missing repo pkg
        grep_attrs = {'communicate.return_value': (b'', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing repo pkg (message should be logged)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_called_once()
        mock_hookenv.reset_mock()

        # reset our grep args to simulate the repo pkg being found
        grep_attrs = {'communicate.return_value': (b'pkg|1|repo', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo and installed pkg versions are the same (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '1'
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo pkg is newer than installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '0'
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

    @mock.patch('charms.layer.apache_bigtop_base.socket')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_check_reverse_dns(self, mock_hookenv, mock_utils, mock_sub,
                               mock_socket):
        '''
        Verify that we set the reverse_dns_ok state, and handle errors
        correctly.

        '''
        # Test the case where things succeed.
        mock_sub.check_output.return_value = b'domain'
        self.bigtop.check_reverse_dns()
        self.assertTrue(unitdata.kv().get('reverse_dns_ok'))

        # Test the case where we get an exception.
        mock_sub.check_output.return_value = b'localdomain'
        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

        class MockHError(Exception):
            pass

        def raise_herror(*args, **kwargs):
            raise MockHError('test')

        mock_socket.herror = MockHError
        mock_socket.gethostbyaddr = raise_herror

        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_fetch_bigtop_release(self, mock_path, mock_hookenv, mock_ver):
        '''Verify we raise an exception if an invalid release is specified.'''
        mock_hookenv.resource_get.return_value = False
        mock_ver.return_value = 'foo'
        self.assertRaises(BigtopError, self.bigtop.fetch_bigtop_release)

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_install_puppet_modules(self, mock_hookenv, mock_utils):
        '''Verify that we seem to install puppet modules correctly.'''
        mock_hookenv.charm_dir.return_value = '/tmp'

        def mock_run_as(user, *args):
            '''
            Verify that we run puppet as root.

            '''
            self.assertEqual(user, 'root')

        mock_utils.run_as.side_effect = mock_run_as
        self.bigtop.install_puppet_modules()

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.glob')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    def test_apply_patches(self, mock_chdir, mock_glob, mock_utils,
                           mock_hookenv):
        '''
        Verify that we apply patches in the correct order.

        '''
        mock_hookenv.charm_dir.return_value = '/tmp'

        reverse_sorted = ['foo', 'baz', 'bar']
        mock_glob.return_value = ['foo', 'baz', 'bar']

        def mock_run_as(*args):
            patch = args[-1]
            self.assertEqual(args[0], 'root')
            # Verify that we're running on a sorted list.
            self.assertTrue(patch.endswith(reverse_sorted.pop()))

        mock_utils.run_as.side_effect = mock_run_as

        self.bigtop.apply_patches()

    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_base')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_hiera_yaml(self, mock_path, mock_base, mock_yaml):
        '''
        Verify that we attempt to add the values that we expect our hiera
        object, before writing it out to a (mocked) yaml file.

        '''
        def mock_dump(hiera_yaml, *args, **kwargs):
            self.assertTrue(hiera_yaml.get(':yaml'))
            self.assertTrue(':datadir' in hiera_yaml[':yaml'])

        mock_yaml.dump.side_effect = mock_dump

        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst
        mock_yaml.load.return_value = defaultdict(lambda: {})
        mock_base.__div__.side_effect = lambda rel: mock_base
        mock_base.__truediv__.side_effect = lambda rel: mock_base

        self.bigtop.render_hiera_yaml()

        # Verify that we attempt to write yaml::datadir to hieradata.
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.site_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_site_yaml(self, mock_path, mock_site, mock_yaml, mock_run):
        '''
        Verify that we attempt to put together a plausible site yaml
        config, before writing it out to a (mocked) yaml file.

        '''

        # Setup
        mock_yaml.load.return_value = defaultdict(lambda: {})
        config = {'roles': None, 'overrides': None, 'hosts': None}

        def verify_yaml(yaml, *args, **kwargs):
            '''
            Verify that the dict we are trying to dump to yaml has the values
            that we expect.

            '''
            self.assertTrue('bigtop::bigtop_repo_uri' in yaml)
            if config['roles'] is None:
                self.assertFalse('bigtop::roles_enabled' in yaml)
            else:
                self.assertTrue('bigtop::roles_enabled' in yaml)
                self.assertTrue('bigtop::roles' in yaml)
                self.assertEqual(yaml['bigtop::roles'],
                                 sorted(config['roles']))
            if config['overrides'] is not None:
                for key in config['overrides']:
                    self.assertTrue(yaml.get(key) == config['overrides'][key])

        mock_yaml.dump.side_effect = verify_yaml

        # Test various permutations of arguments passed in.
        for config_set in [
            {
                'roles': ['foo', 'bar', 'baz']
            },  # Test roles
            {
                'overrides': {
                    'foo': 'bar'
                }
            }
        ]:  # Test override
            config.update(config_set)

            # Test
            self.bigtop.render_site_yaml(roles=config['roles'],
                                         overrides=config['overrides'],
                                         hosts=config['hosts'])

            # Reset
            mock_yaml.load.return_value = defaultdict(lambda: {})
            config['roles'] = None
            config['overrides'] = None
            config['hosts'] = None

    def test_queue_puppet(self):
        '''Verify that we set the expected 'puppet queued' state.'''

        self.bigtop.queue_puppet()
        self.assertTrue(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_handle_queued_puppet(self, mock_ver, mock_hookenv, mock_trigger):
        '''
        Verify that we attempt to call puppet when it has been queued, and
        then clear the queued state.

        '''
        set_state('apache-bigtop-base.puppet_queued')
        mock_ver.return_value = '1.2.0'
        Bigtop._handle_queued_puppet()
        self.assertTrue(mock_trigger.called)
        self.assertFalse(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.unitdata')
    def test_trigger_puppet(self, mock_unit, mock_chdir, mock_utils):
        '''
        Test to verify that we attempt to trigger puppet correctly.

        '''
        def verify_utils_call(user, puppet, *args):
            self.assertEqual(user, 'root')
            self.assertEqual(puppet, 'puppet')

        mock_kv = mock.Mock()
        mock_unit.kv.return_value = mock_kv
        mock_kv.get.return_value = 'foo'

        mock_utils.run_as.side_effect = verify_utils_call

        self.bigtop.trigger_puppet()

        self.assertTrue(mock_utils.run_as.called)

        # TODO: verify the Java 1.7 logic.

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    def test_check_hdfs_setup(self, mock_run, mock_sub):
        '''
        Verify that our hdfs setup check works as expected, and handles
        errors as expected.

        '''
        class MockException(Exception):
            pass

        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        for s in ['ubuntu', '   ubuntu  ', 'ubuntu  ', '  ubuntu']:
            mock_run.return_value = s
            self.assertTrue(self.bigtop.check_hdfs_setup())

        for s in ['foo', '   ', '', ' bar', 'notubuntu', 'ubuntu not ']:
            mock_run.return_value = s
            self.assertFalse(self.bigtop.check_hdfs_setup())

        mock_run.side_effect = mock_raise
        self.assertFalse(self.bigtop.check_hdfs_setup())

    @unittest.skip('noop')
    def test_spec(self):
        '''Nothing to test that the linter won't handle.'''

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.chownr')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_run_smoke_tests(self, mock_options, mock_ownr, mock_chdir,
                             mock_run, mock_sub):
        '''
        Verify that we attempt to run smoke tests correctly, and handle
        exceptions as expected.

        '''
        mock_options.return_value = {}
        # Returns None if bigtop isn't available.
        remove_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Returns None if we don't pass in a 'smoke_components' arg
        set_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Should return 'success' if all went well.
        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            'success')

        # Should return error message if subprocess raised an Exception.
        class MockException(Exception):
            pass

        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        mock_run.side_effect = mock_raise

        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            "test output")

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_reinstall_repo_packages(self, mock_sub, mock_trigger, mock_pin,
                                     mock_path, mock_hiera, mock_update):
        '''
        Verify that we attempt to trigger puppet during a reinstall, and handle
        exceptions as expected.

        '''
        class MockException(Exception):
            pass

        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        # Should return error message if apt-get remove raised an Exception.
        mock_sub.check_call.side_effect = mock_raise
        self.assertEqual(
            self.bigtop.reinstall_repo_packages(remove_pkgs='foo bar-*'),
            "test output")

        # Should call pin twice if trigger puppet fails (once to raise prio,
        # once again to drop it back down)
        mock_trigger.side_effect = mock_raise
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'failed')
        self.assertEqual(mock_pin.call_count, 2)

        # Should return 'success' if all went well.
        mock_trigger.side_effect = None
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'success')

    def test_get_ip_for_interface(self):
        '''
        Test to verify that our get_ip_for_interface method does sensible
        things.

        '''
        ip = self.bigtop.get_ip_for_interface('lo')
        self.assertEqual(ip, '127.0.0.1')

        ip = self.bigtop.get_ip_for_interface('127.0.0.0/24')
        self.assertEqual(ip, '127.0.0.1')

        # If passed 0.0.0.0, or something similar, the function should
        # treat it as a special case, and return what it was passed.
        for i in ['0.0.0.0', '0.0.0.0/0', '0/0', '::']:
            ip = self.bigtop.get_ip_for_interface(i)
            self.assertEqual(ip, i)

        self.assertRaises(BigtopError, self.bigtop.get_ip_for_interface,
                          '2.2.2.0/24')

        self.assertRaises(BigtopError, self.bigtop.get_ip_for_interface, 'foo')