def __init__(self, dist_config=None):
     self.dist_config = dist_config or utils.DistConfig()
     self.resources = {
         'flume': 'flume-%s' % utils.cpu_arch(),
     }
     zk_res = 'zookeeper-%s' % utils.cpu_arch()
     if jujuresources.resource_defined(zk_res):
         self.resources['zookeeper'] = zk_res
     self.verify_resources = utils.verify_resources(
         *self.resources.values())
 def __init__(self, dist_config=None, user='******'):
     self.user = user
     self.dist_config = dist_config or utils.DistConfig()
     self.resources = {
         'flume': 'flume-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(*self.resources.values())
    def prepare_bigtop_component(self, hr_conf, hosts={}):
        '''
        :param hosts: dict of {service: fqdn}
        '''
        cluster_components = self.options.get("bigtop_component_list").split()
        java_package_name = self.options.get('java_package_name')
        bigtop_apt = self.options.get('bigtop_repo-{}'.format(utils.cpu_arch()))
        gw_host = subprocess.check_output(['facter', 'fqdn']).strip().decode()
        nn_host = ''
        rm_host = ''
        for k, host in hosts.items():
            if k == 'namenode':
                nn_host = host
            elif k == 'resourcemanager':
                rm_host = host

        yaml_data = {
            'bigtop::hadoop_gateway_node': gw_host,
            'bigtop::hadoop_head_node': nn_host,
            'hadoop::common_yarn::hadoop_rm_host': rm_host,
            'hadoop::hadoop_storage_dirs': ['/data/1', '/data/2'],
            'hadoop_cluster_node::cluster_components': cluster_components,
            'bigtop::jdk_package_name': '{0}'.format(java_package_name),
            'bigtop::bigtop_repo_uri': '{0}'.format(bigtop_apt),
        }

        Path(hr_conf).dirname().makedirs_p()
        with open(hr_conf, 'w+') as fd:
            yaml.dump(yaml_data, fd)
    def install_benchmark(self):
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/spark-bench'
        if install_sb:
            if not unitdata.kv().get('spark_bench.installed', False):
                if utils.cpu_arch() == 'ppc64le':
                    sb_url = hookenv.config()['spark_bench_ppc64le']
                else:
                    # TODO: may need more arch cases (go with x86 sb for now)
                    sb_url = hookenv.config()['spark_bench_x86_64']

                Path(sb_dir).rmtree_p()
                au = ArchiveUrlFetchHandler()
                au.install(sb_url, '/home/ubuntu')

                # #####
                # Handle glob if we use a .tgz that doesn't expand to sb_dir
                # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0]
                # SparkBench expects to live in ~/spark-bench, so put it there
                # Path(sb_archive_dir).rename(sb_dir)
                # #####

                unitdata.kv().set('spark_bench.installed', True)
                unitdata.kv().flush(True)
        else:
            Path(sb_dir).rmtree_p()
            unitdata.kv().set('spark_bench.installed', False)
            unitdata.kv().flush(True)
Example #5
0
 def __init__(self, dist_config=None):
     self.dist_config = dist_config or utils.DistConfig()
     self.resources = {
         'kafka': 'kafka-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(
         *self.resources.values())
Example #6
0
 def __init__(self, dist_config):
     self.dist_config = dist_config
     self.resources = {
         'spark': 'spark-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(
         *self.resources.values())
Example #7
0
 def __init__(self, dist_config):
     self.dist_config = dist_config
     self.cpu_arch = utils.cpu_arch()
     self.resources = {
         'hive': 'hive-%s' % self.cpu_arch,
     }
     self.verify_resources = utils.verify_resources(*self.resources.values())
Example #8
0
 def __init__(self, dist_config):
     self.user = '******'
     self.dist_config = dist_config
     self.resources = {
         'livy': 'livy-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(*self.resources.values())
Example #9
0
 def __init__(self, dist_config):
     self.user = '******'
     self.dist_config = dist_config
     self.resources = {
         'livy': 'livy-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(
         *self.resources.values())
Example #10
0
 def __init__(self, dist_config=None, user='******'):
     self.user = user
     self.dist_config = dist_config or utils.DistConfig()
     self.resources = {
         'flume': 'flume-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(
         *self.resources.values())
Example #11
0
 def __init__(self, dist_config):
     self.dist_config = dist_config
     self.cpu_arch = utils.cpu_arch()
     self.resources = {
         'hue': 'hue-{}'.format(self.cpu_arch),
     }
     self.verify_resources = utils.verify_resources(*self.resources.values())
     self.hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
Example #12
0
 def __init__(self, dist_config):
     self.dist_config = dist_config
     self.cpu_arch = utils.cpu_arch()
     self.resources = {
         'hue': 'hue-%s' % self.cpu_arch,
     }
     self.verify_resources = utils.verify_resources(
         *self.resources.values())
Example #13
0
    def __init__(self, hadoop_version, dist_config):
        self.dist_config = dist_config
        self.hadoop_version = hadoop_version
        self.cpu_arch = utils.cpu_arch()

        self.resources = {
            'gobblin': 'gobblin-hadoop_%s-%s' % (hadoop_version, self.cpu_arch),
        }
        self.verify_resources = utils.verify_resources(*self.resources.values())
Example #14
0
    def __init__(self, hadoop_version, dist_config):
        self.dist_config = dist_config
        self.cpu_arch = utils.cpu_arch()

        self.resources = {
            'gobblin':
            'gobblin-hadoop_%s-%s' % (hadoop_version, self.cpu_arch),
        }
        self.verify_resources = utils.verify_resources(
            *self.resources.values())
    def prepare_bigtop_passthrough(self, hr_conf, hiera_params):
        '''
        :param hosts: dict of {service: fqdn}
        '''
        java_package_name = self.options.get('java_package_name')
        bigtop_apt = self.options.get('bigtop_repo-{}'.format(utils.cpu_arch()))
        hiera_params['hadoop::hadoop_storage_dirs'] = ['/data/1', '/data/2']
        hiera_params['bigtop::jdk_package_name'] = '{0}'.format(java_package_name)
        hiera_params['bigtop::bigtop_repo_uri'] = '{0}'.format(bigtop_apt)

        Path(hr_conf).dirname().makedirs_p()
        with open(hr_conf, 'w+') as fd:
            yaml.dump(hiera_params, fd)
    def prepare_bigtop_config(self, hr_conf, NN=None, RM=None, extra=None):
        '''
        NN: fqdn of the namenode (head node)
        RM: fqdn of the resourcemanager (optional)
        extra: list of extra cluster components
        '''
        # TODO storage dirs should be configurable
        # TODO list of cluster components should be configurable
        cluster_components = ['hadoop']
        # Setting NN (our head node) is required; exit and log if we dont have it
        if NN is None:
            hookenv.log("No NN hostname given for install")
            hookenv.status_set("waiting", "Cannot install without NN")
            sys.exit(1)
        else:
            nn_fqdn = NN
            hookenv.log("Using %s as our hadoop_head_node" % nn_fqdn)

        # If we have an RM, add 'yarn' to the installed components
        if RM is None:
            rm_fqdn = ''
            hookenv.log("No RM hostname given for install")
        else:
            rm_fqdn = RM
            cluster_components.append('yarn')

        # Add anything else the user wanted
        if extra is not None:
            cluster_components.extend(extra)

        java_package_name = self.options.get('java_package_name')
        bigtop_apt = self.options.get('bigtop_repo-{}'.format(utils.cpu_arch()))

        yaml_data = {
            'bigtop::hadoop_head_node': nn_fqdn,
            'hadoop::common_yarn::hadoop_rm_host': rm_fqdn,
            'hadoop::hadoop_storage_dirs': ['/data/1', '/data/2'],
            'hadoop_cluster_node::cluster_components': cluster_components,
            'bigtop::jdk_package_name': '{0}'.format(java_package_name),
            'bigtop::bigtop_repo_uri': '{0}'.format(bigtop_apt),
        }

        Path(hr_conf).dirname().makedirs_p()
        with open(hr_conf, 'w+') as fd:
            yaml.dump(yaml_data, fd)
Example #17
0
    def __init__(self, dist_config):
        self.dist_config = dist_config
        self.charm_config = hookenv.config()
        self.cpu_arch = utils.cpu_arch()
        self.client_spec = {
            'hadoop': self.dist_config.hadoop_version,
        }

        # dist_config will have simple validation done on primary keys in the
        # dist.yaml, but we need to ensure deeper values are present.
        required_dirs = [
            'hadoop', 'hadoop_conf', 'hdfs_log_dir', 'mapred_log_dir',
            'yarn_log_dir'
        ]
        missing_dirs = set(required_dirs) - set(self.dist_config.dirs.keys())
        if missing_dirs:
            raise ValueError(
                'dirs option in {} is missing required entr{}: {}'.format(
                    self.dist_config.yaml_file,
                    'ies' if len(missing_dirs) > 1 else 'y',
                    ', '.join(missing_dirs)))

        # Build a list of hadoop resources needed from resources.yaml
        self.resources = {
            'java-installer': 'java-installer',
            'hadoop': 'hadoop-%s' % (self.cpu_arch),
        }
        hadoop_version = self.dist_config.hadoop_version
        versioned_res = 'hadoop-%s-%s' % (hadoop_version, self.cpu_arch)
        if jujuresources.resource_defined(versioned_res):
            self.resources['hadoop'] = versioned_res

        # LZO compression for hadoop is distributed separately. Add it to the
        # list of reqs if defined in resources.yaml
        lzo_res = 'hadoop-lzo-%s' % self.cpu_arch
        if jujuresources.resource_defined(lzo_res):
            self.resources['lzo'] = lzo_res

        # Verify and fetch the required hadoop resources
        self.verify_resources = utils.verify_resources(
            *self.resources.values())
        self.verify_conditional_resources = self.verify_resources  # for backwards compat
    def prepare_bigtop_role(self, hr_conf, hosts={}, roles=None):
        java_package_name = self.options.get('java_package_name')
        bigtop_apt = self.options.get('bigtop_repo-{}'.format(utils.cpu_arch()))

        nn_host = ''
        rm_host = ''
        spark_host = ''
        zk_host = ''
        zk_quorum = ''
        for k, host in hosts.items():
            if k == 'namenode':
                nn_host = host
            elif k == 'resourcemanager':
                rm_host = host
            elif k == 'spark':
                spark_host = host
            elif k == 'zk':
                zk_host = host
            elif k == 'zk_quorum':
                zk_quorum = host

        yaml_data = {
            'bigtop::hadoop_head_node': nn_host,
            'bigtop::roles_enabled': True,
            'bigtop::roles': roles,
            'hadoop::common_hdfs::hadoop_namenode_host': nn_host,
            'hadoop::common_yarn::hadoop_ps_host': rm_host,
            'hadoop::common_yarn::hadoop_rm_host': rm_host,
            'hadoop::common_mapred_app::jobtracker_host': rm_host,
            'hadoop::common_mapred_app::mapreduce_jobhistory_host': rm_host,
            'hadoop::zk': zk_host,
            'spark::common::master_host': spark_host,
            'hadoop_zookeeper::server::ensemble': zk_quorum,
            'hadoop::hadoop_storage_dirs': ['/data/1', '/data/2'],
            'bigtop::jdk_package_name': '{0}'.format(java_package_name),
            'bigtop::bigtop_repo_uri': '{0}'.format(bigtop_apt),
        }

        Path(hr_conf).dirname().makedirs_p()
        with open(hr_conf, 'w+') as fd:
            yaml.dump(yaml_data, fd)
Example #19
0
    def __init__(self, dist_config):
        self.dist_config = dist_config
        self.charm_config = hookenv.config()
        self.cpu_arch = utils.cpu_arch()
        self.client_spec = {
            'hadoop': self.dist_config.hadoop_version,
        }

        # dist_config will have simple validation done on primary keys in the
        # dist.yaml, but we need to ensure deeper values are present.
        required_dirs = ['hadoop', 'hadoop_conf', 'hdfs_log_dir',
                         'mapred_log_dir', 'yarn_log_dir']
        missing_dirs = set(required_dirs) - set(self.dist_config.dirs.keys())
        if missing_dirs:
            raise ValueError('dirs option in {} is missing required entr{}: {}'.format(
                self.dist_config.yaml_file,
                'ies' if len(missing_dirs) > 1 else 'y',
                ', '.join(missing_dirs)))

        # Build a list of hadoop resources needed from resources.yaml
        self.resources = {
            'java-installer': 'java-installer',
            'hadoop': 'hadoop-%s' % (self.cpu_arch),
        }
        hadoop_version = self.dist_config.hadoop_version
        versioned_res = 'hadoop-%s-%s' % (hadoop_version, self.cpu_arch)
        if jujuresources.resource_defined(versioned_res):
            self.resources['hadoop'] = versioned_res

        # LZO compression for hadoop is distributed separately. Add it to the
        # list of reqs if defined in resources.yaml
        lzo_res = 'hadoop-lzo-%s' % self.cpu_arch
        if jujuresources.resource_defined(lzo_res):
            self.resources['lzo'] = lzo_res

        # Verify and fetch the required hadoop resources
        self.verify_resources = utils.verify_resources(*self.resources.values())
        self.verify_conditional_resources = self.verify_resources  # for backwards compat
Example #20
0
    def install_benchmark(self):
        """
        Install and configure SparkBench.

        If config[spark_bench_enabled], fetch, install, and configure
        SparkBench on initial invocation. Subsequent invocations will skip the
        fetch/install, but will reconfigure SparkBench since we may need to
        adjust the data dir (eg: benchmark data is stored in hdfs when spark
        is in yarn mode; locally in all other execution modes).
        """
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/SparkBench'
        if install_sb:
            # Fetch/install on our first go-round, then set unit data so we
            # don't reinstall every time this function is called.
            if not unitdata.kv().get('spark_bench.installed', False):
                if utils.cpu_arch() == 'ppc64le':
                    sb_url = hookenv.config()['spark_bench_ppc64le']
                else:
                    # TODO: may need more arch cases (go with x86 sb for now)
                    sb_url = hookenv.config()['spark_bench_x86_64']

                Path(sb_dir).rmtree_p()
                au = ArchiveUrlFetchHandler()
                au.install(sb_url, '/home/ubuntu')

                # NB: This block is unused when using one of our sb tgzs. It
                # may come in handy if people want a tgz that does not expand
                # to our expected sb_dir.
                # #####
                # Handle glob if we use a .tgz that doesn't expand to sb_dir
                # sb_archive_dir = glob('/home/ubuntu/SparkBench*')[0]
                # SparkBench expects to live in ~/SparkBench, so put it there
                # Path(sb_archive_dir).rename(sb_dir)
                # #####

                # Ensure users in the spark group can write to any subdirectory
                # of sb_dir (spark needs to write benchmark output there when
                # running in local modes).
                host.chownr(Path(sb_dir), 'ubuntu', 'spark', chowntopdir=True)
                for r, d, f in os.walk(sb_dir):
                    os.chmod(r, 0o2775)

                unitdata.kv().set('spark_bench.installed', True)
                unitdata.kv().flush(True)

            # Configure the SB env every time this function is called.
            sb_conf = '{}/conf'.format(sb_dir)
            sb_env = Path(sb_conf) / 'env.sh'
            if not sb_env.exists():
                (Path(sb_conf) / 'env.sh.template').copy(sb_env)

            # NB: A few notes on configuring SparkBench:
            # 1. Input data has been pregenerated and packed into the tgz. All
            # spark cluster members will have this data locally, which enables
            # us to execute benchmarks in the absense of HDFS. When spark is in
            # yarn mode, we'll need to generate and store this data in HDFS
            # so nodemanagers can access it (NMs obviously won't have SB
            # installed locally). Set DATA_HDFS to a local dir or common HDFS
            # location depending on our spark execution mode.
            #
            # 2. SB tries to SSH to spark workers to purge vmem caches. This
            # isn't possible in containers, nor is it possible in our env
            # because we don't distribute ssh keys among cluster members.
            # Set MC_LIST to an empty string to prevent this behavior.
            #
            # 3. Throughout SB, HADOOP_HOME/bin is used as the prefix for the
            # hdfs command. Bigtop's hdfs lives at /usr/bin/hdfs, so set the
            # SB HADOOP_HOME accordingly (it's not used for anything else).
            #
            # 4. Use our MASTER envar to set the SparkBench SPARK_MASTER url.
            # It is updated every time we (re)configure spark.
            mode = hookenv.config()['spark_execution_mode']
            if mode.startswith('yarn'):
                sb_data_dir = "hdfs:///user/ubuntu/SparkBench"
            else:
                sb_data_dir = "file://{}".format(sb_dir)

            utils.re_edit_in_place(sb_env, {
                r'^DATA_HDFS *=.*': 'DATA_HDFS="{}"'.format(sb_data_dir),
                r'^DATASET_DIR *=.*': 'DATASET_DIR="{}/dataset"'.format(sb_dir),
                r'^MC_LIST *=.*': 'MC_LIST=""',
                r'.*HADOOP_HOME *=.*': 'HADOOP_HOME="/usr"',
                r'.*SPARK_HOME *=.*': 'SPARK_HOME="/usr/lib/spark"',
                r'^SPARK_MASTER *=.*': 'SPARK_MASTER="$MASTER"',
            })
        else:
            # config[spark_bench_enabled] is false; remove it
            Path(sb_dir).rmtree_p()
            unitdata.kv().set('spark_bench.installed', False)
            unitdata.kv().flush(True)
 def __init__(self, dist_config=None):
     self.dist_config = dist_config or utils.DistConfig()
     self.resources = {
         'datafellas-notebook': 'datafellas-notebook-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(*self.resources.values())
Example #22
0
    def __init__(self, hadoop_version, dist_config):
        self.dist_config = dist_config
        self.cpu_arch = utils.cpu_arch()

        self.resources = {"gobblin": "gobblin-hadoop_%s-%s" % (hadoop_version, self.cpu_arch)}
        self.verify_resources = utils.verify_resources(*self.resources.values())
Example #23
0
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            executor_mem = req_executor_mem

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = self.get_master()
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_DRIVER_MEMORY'] = driver_mem
            env['SPARK_EXECUTOR_MEMORY'] = executor_mem
            env['SPARK_HOME'] = spark_home
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # update spark config
        spark_conf = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(
            spark_conf, {
                r'.*spark.master *.*':
                'spark.master {}'.format(self.get_master()),
                r'.*spark.eventLog.enabled *.*':
                'spark.eventLog.enabled true',
                r'.*spark.eventLog.dir *.*':
                'spark.eventLog.dir hdfs:///user/ubuntu/directory',
            })
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        utils.re_edit_in_place(
            spark_env, {
                r'.*SPARK_DRIVER_MEMORY.*':
                'SPARK_DRIVER_MEMORY={}'.format(driver_mem),
                r'.*SPARK_EXECUTOR_MEMORY.*':
                'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
                r'.*SPARK_LOG_DIR.*':
                'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')),
                r'.*SPARK_MASTER_IP.*':
                'SPARK_MASTER_IP={}'.format(local_ip),
                r'.*SPARK_WORKER_DIR.*':
                'SPARK_WORKER_DIR={}'.format(
                    self.dist_config.path('spark_work')),
            })

        # manage SparkBench
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/spark-bench'
        if install_sb:
            if utils.cpu_arch() == 'ppc64le':
                sb_url = hookenv.config()['spark_bench_ppc64le']
            else:
                # TODO: may need more arch cases (go with x86 sb for now)
                sb_url = hookenv.config()['spark_bench_x86_64']

            Path(sb_dir).rmtree_p()
            fetcher = ArchiveUrlFetchHandler()
            fetcher.install(sb_url, '/home/ubuntu')

            # #####
            # Handle glob if we use a .tgz that doesn't expand to sb_dir
            # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0]
            # SparkBench expects to live in ~/spark-bench, so put it there
            # Path(sb_archive_dir).rename(sb_dir)
            # #####

            # comment out mem tunings (let them come from /etc/environment)
            sb_env = Path(sb_dir) / 'conf/env.sh'
            utils.re_edit_in_place(
                sb_env, {
                    r'^SPARK_DRIVER_MEMORY.*':
                    '# SPARK_DRIVER_MEMORY (use value from environment)',
                    r'^SPARK_EXECUTOR_MEMORY.*':
                    '# SPARK_EXECUTOR_MEMORY (use value from environment)',
                })
        else:
            Path(sb_dir).rmtree_p()
Example #24
0
 def __init__(self, dist_config=None):
     self.dist_config = dist_config or utils.DistConfig()
     self.resources = {
         'kafka': 'kafka-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(*self.resources.values())
    def get_repo_url(self, bigtop_version=None):
        """
        Construct our package repo based on the given bigtop version.

        The package repository is dependent on the bigtop version and
        OS attributes. Construct an appropriate value to use as our site
        bigtop::bigtop_repo_uri param.

        Param string Bigtop version ('1.1.0' or 'master')
        Return Bigtop repository URL
        Raise BigtopError if we have an unexpected version string.
        """
        bigtop_repo_url = None
        release_info = lsb_release()
        repo_arch = utils.cpu_arch().lower()

        dist_name = release_info['DISTRIB_ID'].lower()
        # NB: Set 16.04/xenial as defaults since that matches current bigtop
        # repos. Installation on non-LTS will work with these values.
        dist_release = "16.04"
        dist_series = "xenial"

        # Fail fast if we're not on ubuntu
        if dist_name != 'ubuntu':
            raise BigtopError(
                u"Charms currently only support Bigtop on Ubuntu.")

        if bigtop_version == '1.1.0':
            repo_url = ('http://bigtop-repos.s3.amazonaws.com/releases/'
                        '{version}/{dist}/{series}/{arch}')
            # NB: For 1.1.0, x86 must install from the trusty repo;
            # ppc64le only works from vivid.
            if repo_arch == "ppc64le":
                dist_series = "vivid"
                # 'le' and 'el' are swapped due to historical awfulness:
                #   https://lists.debian.org/debian-powerpc/2014/08/msg00042.html
                repo_arch = "ppc64el"
            else:
                dist_series = "trusty"
            # Substitute params.
            bigtop_repo_url = repo_url.format(
                version=self.bigtop_version,
                dist=dist_name,
                series=dist_series,
                arch=repo_arch
            )
        elif bigtop_version == '1.2.0':
            repo_url = ('http://bigtop-repos.s3.amazonaws.com/releases/'
                        '{version}/{dist}/{release}/{arch}')
            # Substitute params.
            bigtop_repo_url = repo_url.format(
                version=self.bigtop_version,
                dist=dist_name,
                release=dist_release,
                arch=repo_arch
            )
        elif bigtop_version == '1.2.1':
            # NB: Kafka is no longer served from official repos [1], nor are
            # there non-x86 repos available for 1.2.1. Handle these cases by
            # using the bigtop CI repository.
            # [1]: http://mail-archives.apache.org/mod_mbox/bigtop-announce/201708.mbox/thread
            if hookenv.metadata()['name'] == 'kafka' or repo_arch != "x86_64":
                bigtop_repo_url = ('https://ci.bigtop.apache.org/'
                                   'job/Bigtop-1.2.1/OS=ubuntu-16.04/'
                                   'lastSuccessfulBuild/artifact/output/apt')
            else:
                repo_url = ('http://repos.bigtop.apache.org/releases/'
                            '{version}/{dist}/{release}/{arch}')
                # Substitute params.
                bigtop_repo_url = repo_url.format(
                    version=self.bigtop_version,
                    dist=dist_name,
                    release=dist_release,
                    arch=repo_arch
                )
        elif bigtop_version == '1.3.0':
            # NB: Kafka is no longer served from official repos [1], nor are
            # there non-x86 repos available for 1.2.1. Handle these cases by
            # using the bigtop CI repository.
            # [1]: http://mail-archives.apache.org/mod_mbox/bigtop-announce/201708.mbox/thread
            #if hookenv.metadata()['name'] == 'kafka' or repo_arch != "x86_64":
            bigtop_repo_url = ('https://ci.bigtop.apache.org/job/Bigtop-1.3.0/'
                                   'DISTRO=ubuntu-16.04,PLATFORM=amd64-slave/'
                                   'lastSuccessfulBuild/artifact/output/apt/')
            # else:
            #     repo_url = ('http://repos.bigtop.apache.org/releases/'
            #                 '{version}/{dist}/{release}/{arch}')
            #     # Substitute params.
            #     bigtop_repo_url = repo_url.format(
            #         version=self.bigtop_version,
            #         dist=dist_name,
            #         release=dist_release,
            #         arch=repo_arch
            #     )
        elif bigtop_version == 'master':
            if repo_arch == "x86_64":
                bigtop_repo_url = ('https://ci.bigtop.apache.org/'
                                   'job/Bigtop-trunk-repos/'
                                   'OS=ubuntu-16.04,label=docker-slave/'
                                   'ws/output/apt')
            else:
                bigtop_repo_url = ('https://ci.bigtop.apache.org/'
                                   'job/Bigtop-trunk-repos/'
                                   'OS=ubuntu-16.04-{},label=docker-slave/'
                                   'ws/output/apt'.format(repo_arch))
        else:
            raise BigtopError(
                u"Unknown Bigtop version for repo_url: {}".format(bigtop_version))

        return bigtop_repo_url
Example #26
0
    def install_benchmark(self):
        """
        Install and configure SparkBench.

        If config[spark_bench_enabled], fetch, install, and configure
        SparkBench on initial invocation. Subsequent invocations will skip the
        fetch/install, but will reconfigure SparkBench since we may need to
        adjust the data dir (eg: benchmark data is stored in hdfs when spark
        is in yarn mode; locally in all other execution modes).
        """
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/SparkBench'
        if install_sb:
            # Fetch/install on our first go-round, then set unit data so we
            # don't reinstall every time this function is called.
            if not unitdata.kv().get('spark_bench.installed', False):
                if utils.cpu_arch() == 'ppc64le':
                    sb_url = hookenv.config()['spark_bench_ppc64le']
                else:
                    # TODO: may need more arch cases (go with x86 sb for now)
                    sb_url = hookenv.config()['spark_bench_x86_64']

                Path(sb_dir).rmtree_p()
                au = ArchiveUrlFetchHandler()
                au.install(sb_url, '/home/ubuntu')

                # NB: This block is unused when using one of our sb tgzs. It
                # may come in handy if people want a tgz that does not expand
                # to our expected sb_dir.
                # #####
                # Handle glob if we use a .tgz that doesn't expand to sb_dir
                # sb_archive_dir = glob('/home/ubuntu/SparkBench*')[0]
                # SparkBench expects to live in ~/SparkBench, so put it there
                # Path(sb_archive_dir).rename(sb_dir)
                # #####

                # Ensure users in the spark group can write to any subdirectory
                # of sb_dir (spark needs to write benchmark output there when
                # running in local modes).
                host.chownr(Path(sb_dir), 'ubuntu', 'spark', chowntopdir=True)
                for r, d, f in os.walk(sb_dir):
                    os.chmod(r, 0o2775)

                unitdata.kv().set('spark_bench.installed', True)
                unitdata.kv().flush(True)

            # Configure the SB env every time this function is called.
            sb_conf = '{}/conf'.format(sb_dir)
            sb_env = Path(sb_conf) / 'env.sh'
            if not sb_env.exists():
                (Path(sb_conf) / 'env.sh.template').copy(sb_env)

            # NB: A few notes on configuring SparkBench:
            # 1. Input data has been pregenerated and packed into the tgz. All
            # spark cluster members will have this data locally, which enables
            # us to execute benchmarks in the absense of HDFS. When spark is in
            # yarn mode, we'll need to generate and store this data in HDFS
            # so nodemanagers can access it (NMs obviously won't have SB
            # installed locally). Set DATA_HDFS to a local dir or common HDFS
            # location depending on our spark execution mode.
            #
            # 2. SB tries to SSH to spark workers to purge vmem caches. This
            # isn't possible in containers, nor is it possible in our env
            # because we don't distribute ssh keys among cluster members.
            # Set MC_LIST to an empty string to prevent this behavior.
            #
            # 3. Throughout SB, HADOOP_HOME/bin is used as the prefix for the
            # hdfs command. Bigtop's hdfs lives at /usr/bin/hdfs, so set the
            # SB HADOOP_HOME accordingly (it's not used for anything else).
            #
            # 4. Use our MASTER envar to set the SparkBench SPARK_MASTER url.
            # It is updated every time we (re)configure spark.
            mode = hookenv.config()['spark_execution_mode']
            if mode.startswith('yarn'):
                sb_data_dir = "hdfs:///user/ubuntu/SparkBench"
            else:
                sb_data_dir = "file://{}".format(sb_dir)

            utils.re_edit_in_place(sb_env, {
                r'^DATA_HDFS *=.*': 'DATA_HDFS="{}"'.format(sb_data_dir),
                r'^DATASET_DIR *=.*': 'DATASET_DIR="{}/dataset"'.format(sb_dir),
                r'^MC_LIST *=.*': 'MC_LIST=""',
                r'.*HADOOP_HOME *=.*': 'HADOOP_HOME="/usr"',
                r'.*SPARK_HOME *=.*': 'SPARK_HOME="/usr/lib/spark"',
                r'^SPARK_MASTER *=.*': 'SPARK_MASTER="$MASTER"',
            })
        else:
            # config[spark_bench_enabled] is false; remove it
            Path(sb_dir).rmtree_p()
            unitdata.kv().set('spark_bench.installed', False)
            unitdata.kv().flush(True)
Example #27
0
 def __init__(self, dist_config=None):
     self.dist_config = dist_config or utils.DistConfig()
     self.resources = {
         'zeppelin': 'zeppelin-%s' % utils.cpu_arch(),
     }
Example #28
0
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            executor_mem = req_executor_mem

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = self.get_master()
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_DRIVER_MEMORY'] = driver_mem
            env['SPARK_EXECUTOR_MEMORY'] = executor_mem
            env['SPARK_HOME'] = spark_home
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # update spark config
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master *.*': 'spark.master {}'.format(self.get_master()),
            r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled true',
            r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir hdfs:///user/ubuntu/directory',
            })
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        local_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DRIVER_MEMORY.*': 'SPARK_DRIVER_MEMORY={}'.format(driver_mem),
            r'.*SPARK_EXECUTOR_MEMORY.*': 'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
            r'.*SPARK_LOG_DIR.*': 'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')),
            r'.*SPARK_MASTER_IP.*': 'SPARK_MASTER_IP={}'.format(local_ip),
            r'.*SPARK_WORKER_DIR.*': 'SPARK_WORKER_DIR={}'.format(self.dist_config.path('spark_work')),
            })

        # manage SparkBench
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/spark-bench'
        if install_sb:
            if utils.cpu_arch() == 'ppc64le':
                sb_url = hookenv.config()['spark_bench_ppc64le']
            else:
                # TODO: may need more arch cases (go with x86 sb for now)
                sb_url = hookenv.config()['spark_bench_x86_64']

            Path(sb_dir).rmtree_p()
            fetcher = ArchiveUrlFetchHandler()
            fetcher.install(sb_url, '/home/ubuntu')

            # #####
            # Handle glob if we use a .tgz that doesn't expand to sb_dir
            # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0]
            # SparkBench expects to live in ~/spark-bench, so put it there
            # Path(sb_archive_dir).rename(sb_dir)
            # #####

            # comment out mem tunings (let them come from /etc/environment)
            sb_env = Path(sb_dir) / 'conf/env.sh'
            utils.re_edit_in_place(sb_env, {
                r'^SPARK_DRIVER_MEMORY.*': '# SPARK_DRIVER_MEMORY (use value from environment)',
                r'^SPARK_EXECUTOR_MEMORY.*': '# SPARK_EXECUTOR_MEMORY (use value from environment)',
                })
        else:
            Path(sb_dir).rmtree_p()
    def __init__(self, dist_config):
        self.dist_config = dist_config
        self.cpu_arch = utils.cpu_arch()

        self.resources = {"livy": "livy-%s" % self.cpu_arch}
        self.verify_resources = utils.verify_resources(*self.resources.values())
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        dc = self.dist_config
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            executor_mem = req_executor_mem

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = self.get_master()
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_DRIVER_MEMORY'] = driver_mem
            env['SPARK_EXECUTOR_MEMORY'] = executor_mem
            env['SPARK_HOME'] = spark_home

        events_dir = 'file://{}'.format(dc.path('spark_events'))
        if unitdata.kv().get('hdfs.available', False):
            prefix = dc.path('log_prefix')
            events_dir = dc.path('spark_events')
            events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, ''))

        # update spark-defaults
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*': 'spark.master {}'.format(self.get_master()),
            r'.*spark.eventLog.enabled .*': 'spark.eventLog.enabled true',
            r'.*spark.history.fs.logDirectory .*': 'spark.history.fs.logDirectory {}'.format(
                events_dir),
            r'.*spark.eventLog.dir .*': 'spark.eventLog.dir {}'.format(events_dir),
        }, append_non_matches=True)

        # update spark-env
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DRIVER_MEMORY.*': 'SPARK_DRIVER_MEMORY={}'.format(driver_mem),
            r'.*SPARK_EXECUTOR_MEMORY.*': 'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
            r'.*SPARK_LOG_DIR.*': 'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')),
            r'.*SPARK_WORKER_DIR.*': 'SPARK_WORKER_DIR={}'.format(self.dist_config.path('spark_work')),
        })

        # If zookeeper is available we should be in HA mode so we should not set the MASTER_IP
        if not unitdata.kv().get('zookeepers.available', False):
            master_ip = self.get_master_ip()
            utils.re_edit_in_place(spark_env, {
                r'.*SPARK_MASTER_IP.*': 'SPARK_MASTER_IP={}'.format(master_ip),
            })

        # manage SparkBench
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/spark-bench'
        if install_sb:
            if not unitdata.kv().get('spark_bench.installed', False):
                if utils.cpu_arch() == 'ppc64le':
                    sb_url = hookenv.config()['spark_bench_ppc64le']
                else:
                    # TODO: may need more arch cases (go with x86 sb for now)
                    sb_url = hookenv.config()['spark_bench_x86_64']

                Path(sb_dir).rmtree_p()
                au = ArchiveUrlFetchHandler()
                au.install(sb_url, '/home/ubuntu')

                # #####
                # Handle glob if we use a .tgz that doesn't expand to sb_dir
                # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0]
                # SparkBench expects to live in ~/spark-bench, so put it there
                # Path(sb_archive_dir).rename(sb_dir)
                # #####

                # comment out mem tunings (let them come from /etc/environment)
                sb_env = Path(sb_dir) / 'conf/env.sh'
                utils.re_edit_in_place(sb_env, {
                    r'^SPARK_DRIVER_MEMORY.*': '# SPARK_DRIVER_MEMORY (use value from environment)',
                    r'^SPARK_EXECUTOR_MEMORY.*': '# SPARK_EXECUTOR_MEMORY (use value from environment)',
                })

                unitdata.kv().set('spark_bench.installed', True)
                unitdata.kv().flush(True)
        else:
            Path(sb_dir).rmtree_p()
            unitdata.kv().set('spark_bench.installed', False)
            unitdata.kv().flush(True)

        self.setup_init_scripts()
Example #31
0
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        dc = self.dist_config
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if self.is_spark_local():
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            executor_mem = req_executor_mem

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = self.get_master()
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_DRIVER_MEMORY'] = driver_mem
            env['SPARK_EXECUTOR_MEMORY'] = executor_mem
            env['SPARK_HOME'] = spark_home

        events_dir = 'file://{}'.format(dc.path('spark_events'))
        if unitdata.kv().get('hdfs.available', False):
            prefix = dc.path('log_prefix')
            events_dir = dc.path('spark_events')
            events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, ''))

        # update spark-defaults
        spark_conf = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*':
            'spark.master {}'.format(self.get_master()),
            r'.*spark.eventLog.enabled .*':
            'spark.eventLog.enabled true',
            r'.*spark.history.fs.logDirectory .*':
            'spark.history.fs.logDirectory {}'.format(events_dir),
            r'.*spark.eventLog.dir .*':
            'spark.eventLog.dir {}'.format(events_dir),
        },
                               append_non_matches=True)

        # update spark-env
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(
            spark_env, {
                r'.*SPARK_DRIVER_MEMORY.*':
                'SPARK_DRIVER_MEMORY={}'.format(driver_mem),
                r'.*SPARK_EXECUTOR_MEMORY.*':
                'SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
                r'.*SPARK_LOG_DIR.*':
                'SPARK_LOG_DIR={}'.format(self.dist_config.path('spark_logs')),
                r'.*SPARK_WORKER_DIR.*':
                'SPARK_WORKER_DIR={}'.format(
                    self.dist_config.path('spark_work')),
            })

        # If zookeeper is available we should be in HA mode so we should not set the MASTER_IP
        if not unitdata.kv().get('zookeepers.available', False):
            master_ip = self.get_master_ip()
            utils.re_edit_in_place(spark_env, {
                r'.*SPARK_MASTER_IP.*':
                'SPARK_MASTER_IP={}'.format(master_ip),
            })

        # manage SparkBench
        install_sb = hookenv.config()['spark_bench_enabled']
        sb_dir = '/home/ubuntu/spark-bench'
        if install_sb:
            if not unitdata.kv().get('spark_bench.installed', False):
                if utils.cpu_arch() == 'ppc64le':
                    sb_url = hookenv.config()['spark_bench_ppc64le']
                else:
                    # TODO: may need more arch cases (go with x86 sb for now)
                    sb_url = hookenv.config()['spark_bench_x86_64']

                Path(sb_dir).rmtree_p()
                au = ArchiveUrlFetchHandler()
                au.install(sb_url, '/home/ubuntu')

                # #####
                # Handle glob if we use a .tgz that doesn't expand to sb_dir
                # sb_archive_dir = glob('/home/ubuntu/spark-bench-*')[0]
                # SparkBench expects to live in ~/spark-bench, so put it there
                # Path(sb_archive_dir).rename(sb_dir)
                # #####

                # comment out mem tunings (let them come from /etc/environment)
                sb_env = Path(sb_dir) / 'conf/env.sh'
                utils.re_edit_in_place(
                    sb_env, {
                        r'^SPARK_DRIVER_MEMORY.*':
                        '# SPARK_DRIVER_MEMORY (use value from environment)',
                        r'^SPARK_EXECUTOR_MEMORY.*':
                        '# SPARK_EXECUTOR_MEMORY (use value from environment)',
                    })

                unitdata.kv().set('spark_bench.installed', True)
                unitdata.kv().flush(True)
        else:
            Path(sb_dir).rmtree_p()
            unitdata.kv().set('spark_bench.installed', False)
            unitdata.kv().flush(True)

        self.setup_init_scripts()
Example #32
0
 def __init__(self, dist_config):
     self.dist_config = dist_config
     self.resources = {
         'oozie': 'oozie-%s' % utils.cpu_arch(),
     }
     self.verify_resources = utils.verify_resources(*self.resources.values())
    def get_repo_url(self, bigtop_version=None):
        """
        Construct our package repo based on the given bigtop version.

        The package repository is dependent on the bigtop version and
        OS attributes. Construct an appropriate value to use as our site
        bigtop::bigtop_repo_uri param.

        Param string Bigtop version ('1.1.0' or 'master')
        Return Bigtop repository URL
        Raise BigtopError if we have an unexpected version string.
        """
        bigtop_repo_url = None
        release_info = lsb_release()
        repo_arch = utils.cpu_arch().lower()

        dist_name = release_info['DISTRIB_ID'].lower()
        # NB: Set 16.04/xenial as defaults since that matches current bigtop
        # repos. Installation on non-LTS will work with these values.
        dist_release = "16.04"
        dist_series = "xenial"

        # Fail fast if we're not on ubuntu
        if dist_name != 'ubuntu':
            raise BigtopError(
                u"Charms currently only support Bigtop on Ubuntu.")

        if bigtop_version == '1.1.0':
            repo_url = ('http://bigtop-repos.s3.amazonaws.com/releases/'
                        '{version}/{dist}/{series}/{arch}')
            # NB: For 1.1.0, x86 must install from the trusty repo;
            # ppc64le only works from vivid.
            if repo_arch == "ppc64le":
                dist_series = "vivid"
                # 'le' and 'el' are swapped due to historical awfulness:
                #   https://lists.debian.org/debian-powerpc/2014/08/msg00042.html
                repo_arch = "ppc64el"
            else:
                dist_series = "trusty"
            # Substitute params.
            bigtop_repo_url = repo_url.format(
                version=self.bigtop_version,
                dist=dist_name,
                series=dist_series,
                arch=repo_arch
            )
        elif bigtop_version == '1.2.0':
            repo_url = ('http://bigtop-repos.s3.amazonaws.com/releases/'
                        '{version}/{dist}/{release}/{arch}')
            # Substitute params.
            bigtop_repo_url = repo_url.format(
                version=self.bigtop_version,
                dist=dist_name,
                release=dist_release,
                arch=repo_arch
            )
        elif bigtop_version == '1.2.1':
            # NB: Kafka is no longer served from official repos [1], nor are
            # there non-x86 repos available for 1.2.1. Handle these cases by
            # using the bigtop CI repository.
            # [1]: http://mail-archives.apache.org/mod_mbox/bigtop-announce/201708.mbox/thread
            if hookenv.metadata()['name'] == 'kafka' or repo_arch != "x86_64":
                bigtop_repo_url = ('https://ci.bigtop.apache.org/'
                                   'job/Bigtop-1.2.1/OS=ubuntu-16.04/'
                                   'lastSuccessfulBuild/artifact/output/apt')
            else:
                repo_url = ('http://repos.bigtop.apache.org/releases/'
                            '{version}/{dist}/{release}/{arch}')
                # Substitute params.
                bigtop_repo_url = repo_url.format(
                    version=self.bigtop_version,
                    dist=dist_name,
                    release=dist_release,
                    arch=repo_arch
                )
        elif bigtop_version == 'master':
            if repo_arch == "x86_64":
                bigtop_repo_url = ('https://ci.bigtop.apache.org/'
                                   'job/Bigtop-trunk-repos/'
                                   'OS=ubuntu-16.04,label=docker-slave/'
                                   'ws/output/apt')
            else:
                bigtop_repo_url = ('https://ci.bigtop.apache.org/'
                                   'job/Bigtop-trunk-repos/'
                                   'OS=ubuntu-16.04-{},label=docker-slave/'
                                   'ws/output/apt'.format(repo_arch))
        else:
            raise BigtopError(
                u"Unknown Bigtop version for repo_url: {}".format(bigtop_version))

        return bigtop_repo_url