Beispiel #1
0
    def install(self):
        '''
        Fetch resources
        '''
        self.dist_config.add_users()
        self.dist_config.add_dirs()

        result = resource_get('tomee')
        if not result:
            log("Failed to fetch TomEE resource")
            return False

        unitdata.kv().set("tomeetarball", result)
        log("TomEE tarball path is {}".format(result))
        tomee_install_dir = self.dist_config.path('tomee_dir')
        with chdir(tomee_install_dir):
            utils.run_as('tomcat', 'tar', '-zxvf', '{}'.format(result))

        tomee_dirs = [f for f in os.listdir(tomee_install_dir)
                      if f.startswith('apache-tomee')]
        catalina_home = os.path.join(tomee_install_dir, tomee_dirs[0])
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['CATALINA_HOME'] = catalina_home
        unitdata.kv().set("catalina_home", catalina_home)
        self.open_ports()
        return True
Beispiel #2
0
    def setup_init_scripts(self):
        if host.init_is_systemd():
            template_path = '/etc/systemd/system/zeppelin.service'
            template_name = 'systemd.conf'
        else:
            template_path = '/etc/init/zeppelin.conf'
            template_name = 'upstart.conf'
        if os.path.exists(template_path):
            template_path_backup = "{}.backup".format(template_path)
            if os.path.exists(template_path_backup):
                os.remove(template_path_backup)
            os.rename(template_path, template_path_backup)

        render(
            template_name,
            template_path,
            context={
                'zeppelin_home': self.dist_config.path('zeppelin'),
                'zeppelin_conf': self.dist_config.path('zeppelin_conf')
            },
        )

        if host.init_is_systemd():
            utils.run_as('root', 'systemctl', 'enable', 'zeppelin.service')
            utils.run_as('root', 'systemctl', 'daemon-reload')
Beispiel #3
0
    def configure_remote_db(self, mysql):
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        jdbc_url = \
            "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format(
                mysql.host(), mysql.port(), mysql.database()
            )
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = jdbc_url
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = \
                "com.mysql.jdbc.Driver"

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HIVE_AUX_JARS_PATH *=.*':
            ('export HIVE_AUX_JARS_PATH='
             '/usr/share/java/mysql-connector-java.jar'),
        })

        # Now that we have db connection info, init our schema (only once)
        remote_db = hookenv.remote_service_name()
        if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db):
            tool_path = "{}/bin/schematool".format(
                self.dist_config.path('hive'))
            utils.run_as(
                'ubuntu', tool_path, '-initSchema', '-dbType', 'mysql')
            unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True)
            unitdata.kv().flush(True)
Beispiel #4
0
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # put our jar in hdfs
        spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(spark_home))[0]
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib')
        try:
            utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar')
        except CalledProcessError:
            print ("File exists")

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = hookenv.config('spark_execution_mode')
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_HOME'] = spark_home
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # update spark config
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled    true',
            r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir    hdfs:///user/ubuntu/directory',
            })
Beispiel #5
0
    def configure_remote_db(self, mysql):
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        jdbc_url = \
            "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format(
                mysql.host(), mysql.port(), mysql.database()
            )
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = jdbc_url
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = \
                "com.mysql.jdbc.Driver"

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(
            hive_env, {
                r'.*export HIVE_AUX_JARS_PATH *=.*':
                ('export HIVE_AUX_JARS_PATH='
                 '/usr/share/java/mysql-connector-java.jar'),
            })

        # Now that we have db connection info, init our schema (only once)
        remote_db = hookenv.remote_service_name()
        if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db):
            tool_path = "{}/bin/schematool".format(
                self.dist_config.path('hive'))
            utils.run_as('ubuntu', tool_path, '-initSchema', '-dbType',
                         'mysql')
            unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True)
            unitdata.kv().flush(True)
    def setup_hive_config(self):
        '''
        copy the default configuration files to hive_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('hive') / 'conf'
        hive_conf = self.dist_config.path('hive_conf')
        hive_conf.rmtree_p()
        default_conf.copytree(hive_conf)

        # Configure immutable bits
        hive_bin = self.dist_config.path('hive') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hive_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hive_bin])
            env['HIVE_CONF_DIR'] = self.dist_config.path('hive_conf')

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy(hive_env)

        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        if not hive_site.exists():
            (self.dist_config.path('hive_conf') / 'hive-default.xml.template').copy(hive_site)
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            # TODO (kwm): we should be able to export java.io.tmpdir so these 4 arent needed
            props['hive.exec.local.scratchdir'] = "/tmp/hive"
            props['hive.downloaded.resources.dir'] = "/tmp/hive_resources"
            props['hive.querylog.location'] = "/tmp/hive"
            props['hive.server2.logging.operation.log.location'] = "/tmp/hive"
            ####

        # create hdfs storage space
        utils.run_as('hive', 'hdfs', 'dfs', '-mkdir', '-p', '/user/hive/warehouse')
Beispiel #7
0
 def stop(self):
     hookenv.log("Stopping HUE and Supervisor process")
     try:
         utils.run_as('hue', 'pkill', '-9', 'supervisor')
         utils.run_as('hue', 'pkill', '-9', 'hue')
     except subprocess.CalledProcessError:
         return
    def configure_hive(self, mysql):
        config = hookenv.config()
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props[
                'javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format(
                    mysql.host(), mysql.port(), mysql.database())
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props[
                'javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver"
            props[
                'hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[
                    self.cpu_arch]

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(
            hive_env, {
                r'.*export HADOOP_HEAPSIZE *=.*':
                'export HADOOP_HEAPSIZE=%s' % config['heap'],
                r'.*export HIVE_AUX_JARS_PATH *=.*':
                'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar',
            })

        # Now that we have db connection info, init our schema (only once)
        if not unitdata.kv().get('hive.schema.initialized'):
            utils.run_as('hive', 'schematool', '-initSchema', '-dbType',
                         'mysql')
            unitdata.kv().set('hive.schema.initialized', True)
Beispiel #9
0
 def stop(self):
     zeppelin_conf = self.dist_config.path('zeppelin_conf')
     zeppelin_home = self.dist_config.path('zeppelin')
     # TODO: try/catch existence of zeppelin-daemon.sh. Stop hook will fail
     # if we try to destroy a deployment that didn't finish installing.
     utils.run_as('ubuntu',
                  '{}/bin/zeppelin-daemon.sh'.format(zeppelin_home),
                  '--config', zeppelin_conf, 'stop')
 def trigger_puppet(self):
     # TODO need to either manage the apt keys from Juju or
     # update upstream Puppet recipes to install them along with apt source
     # puppet apply needs to be ran where recipes were unpacked
     with chdir("{0}/{1}".format(self.bigtop_dir, self.bigtop_version)):
         utils.run_as('root', 'puppet', 'apply', '-d',
                      '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                      'bigtop-deploy/puppet/manifests/site.pp')
Beispiel #11
0
 def soft_restart(self):
     hookenv.log("Restarting HUE with Supervisor process")
     try:
         utils.run_as('hue', 'pkill', '-9', 'hue')
     except subprocess.CalledProcessError:
         hookenv.log("Problem with Supervisor process, doing hard HUE restart")
         self.stop()
         self.start()
 def init_zkrest(self):
     # Zookeeper user needs to compile the rest contrib server.
     # So zookeeper needs to:
     # 1. Have a home dir for ant cache to exist
     # 2. Write to the /usr/lib/zookeeper
     chownr(self.dist_config.path('zookeeper'), 'zookeeper', 'zookeeper', chowntopdir=True)
     with chdir(self.dist_config.path('zookeeper')):
         utils.run_as('zookeeper', 'ant')
     unitdata.kv().set('rest.initialised', True)
 def init_zkrest(self):
     # Zookeeper user needs to compile the rest contrib server.
     # So zookeeper needs to:
     # 1. Have a home dir for ant cache to exist
     # 2. Write to the /usr/lib/zookeeper
     chownr(self.dist_config.path('zookeeper'), 'zookeeper', 'zookeeper', chowntopdir=True)
     with chdir(self.dist_config.path('zookeeper')):
         utils.run_as('zookeeper', 'ant')
     unitdata.kv().set('rest.initialised', True)
 def setup_hdfs_logs(self):
     # create hdfs storage space for history server
     dc = self.dist_config
     events_dir = dc.path('spark_events')
     events_dir = 'hdfs://{}'.format(events_dir)
     utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir)
     utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:spark',
                  events_dir)
     return events_dir
 def install_puppet_modules(self):
     # Install required modules
     charm_dir = Path(hookenv.charm_dir())
     for module in sorted(glob('resources/puppet-modules/*.tar.gz')):
         # Required modules are included in the charms to support network
         # restricted deployment. Using force install / ignore deps prevents
         # puppet from calling out to https://forgeapi.puppetlabs.com.
         utils.run_as('root', 'puppet', 'module', 'install',
                      '--force', '--ignore-dependencies',
                      charm_dir / module)
 def install_puppet_modules(self):
     # Install required modules
     charm_dir = Path(hookenv.charm_dir())
     for module in sorted(glob('resources/puppet-modules/*.tar.gz')):
         # Required modules are included in the charms to support network
         # restricted deployment. Using force install / ignore deps prevents
         # puppet from calling out to https://forgeapi.puppetlabs.com.
         utils.run_as('root', 'puppet', 'module', 'install',
                      '--force', '--ignore-dependencies',
                      charm_dir / module)
Beispiel #17
0
    def configure_hadoop(self):
        # create hdfs storage space
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/zeppelin')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', 'zeppelin', '/user/zeppelin')

        # If spark is ready, let it handle the spark_master_url. Otherwise,
        # zepp is in local mode; set it to yarn-client since hadoop is here.
        if not is_state('spark.ready'):
            self._add_override('zeppelin::server::spark_master_url', 'yarn-client')
            self.trigger_bigtop()
Beispiel #18
0
    def deploy(self):
        '''
        Just install the two deb packages. Should throw an exception in case
        installation fails.
        '''
        udedeb = unitdata.kv().get("udedeb")
        utils.run_as('root', 'dpkg', '-i', '{}'.format(udedeb))

        mariadeb = unitdata.kv().get("mariadeb")
        utils.run_as('root', 'dpkg', '-i', '{}'.format(mariadeb))
 def start(self):
     # Start if we're not already running. We currently dont have any
     # runtime config options, so no need to restart when hooks fire.
     if not utils.jps("livy"):
         livy_log = self.dist_config.path("livy_logs") + "livy-server.log"
         livy_home = self.dist_config.path("livy")
         # chdir here because things like zepp tutorial think ZEPPELIN_HOME
         # is wherever the daemon was started from.
         os.chdir(livy_home)
         utils.run_as("hue", "./bin/livy-server", "2>&1", livy_log, "&")
 def start(self):
     # Start if we're not already running. We currently dont have any
     # runtime config options, so no need to restart when hooks fire.
     if not utils.jps("zeppelin"):
         zeppelin_conf = self.dist_config.path('zeppelin_conf')
         zeppelin_home = self.dist_config.path('zeppelin')
         # chdir here because things like zepp tutorial think ZEPPELIN_HOME
         # is wherever the daemon was started from.
         os.chdir(zeppelin_home)
         utils.run_as('ubuntu', '{}/bin/zeppelin-daemon.sh'.format(zeppelin_home), '--config', zeppelin_conf, 'start')
    def configure(self, available_hosts):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is there
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """

        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        self.install_benchmark()

        hosts = {
            'spark': available_hosts['spark-master'],
        }

        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        roles = self.get_roles()

        override = {
            'spark::common::master_url': self.get_master_url(available_hosts['spark-master']),
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
        # There is a race condition here.
        # The work role will not start the first time we trigger puppet apply.
        # The exception in /var/logs/spark:
        # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077
        # The master url is not set at the time the worker start the first time.
        # TODO(kjackal): ...do the needed... (investiate,debug,submit patch)
        bigtop.trigger_puppet()
        if 'namenode' not in available_hosts:
            # Make sure users other than spark can access the events logs dir and run jobs
            utils.run_as('root', 'chmod', '777', dc.path('spark_events'))
    def configure_hadoop(self):
        # create hdfs storage space
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/zeppelin')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', 'zeppelin', '/user/zeppelin')

        # If spark is ready, let configure_spark() trigger bigtop. Otherwise,
        # put our spark in yarn-client mode since hadoop is here.
        if not is_state('spark.ready'):
            self._add_override('spark::common::master_url', 'yarn-client')
            self._add_override('zeppelin::server::spark_master_url', 'yarn-client')
            self.trigger_bigtop()
Beispiel #23
0
 def start(self):
     # Start if we're not already running. We currently dont have any
     # runtime config options, so no need to restart when hooks fire.
     if not utils.jps("zeppelin"):
         zeppelin_conf = self.dist_config.path('zeppelin_conf')
         zeppelin_home = self.dist_config.path('zeppelin')
         # chdir here because things like zepp tutorial think ZEPPELIN_HOME
         # is wherever the daemon was started from.
         os.chdir(zeppelin_home)
         utils.run_as('ubuntu',
                      '{}/bin/zeppelin-daemon.sh'.format(zeppelin_home),
                      '--config', zeppelin_conf, 'start')
    def run_smoke_tests(self, smoke_components=None, smoke_env=None):
        """
        Run the Bigtop smoke tests for given components using the gradle
        wrapper script.

        :param list smoke_components: Bigtop components to smoke test
        :param list smoke_env: Dict of required environment variables (merged
            with /etc/environment)
        """
        if not is_state('bigtop.available'):
            hookenv.log('Bigtop is not ready to run smoke tests')
            return None
        if not smoke_components:
            hookenv.log('Missing Bigtop smoke test component list')
            return None

        # We always need TERM and JAVA_HOME; merge with any user provided dict
        subprocess_env = {'TERM': 'dumb', 'JAVA_HOME': java_home()}
        if isinstance(smoke_env, dict):
            subprocess_env.update(smoke_env)

        # Ensure the base dir is owned by ubuntu so we can create a .gradle dir.
        chownr(self.bigtop_base, 'ubuntu', 'ubuntu', chowntopdir=True)

        # Gradle doesn't honor env proxies; check for either http* or HTTP* and
        # set cli args as needed.
        http_url = os.environ.get('http_proxy', os.environ.get('HTTP_PROXY'))
        https_url = os.environ.get('https_proxy', os.environ.get('HTTPS_PROXY'))
        proxy_args = []
        if http_url:
            parsed_url = urlparse(http_url)
            proxy_args += ['-Dhttp.proxyHost={}'.format(parsed_url.hostname),
                           '-Dhttp.proxyPort={}'.format(parsed_url.port)]
        if https_url:
            parsed_url = urlparse(https_url)
            proxy_args += ['-Dhttps.proxyHost={}'.format(parsed_url.hostname),
                           '-Dhttps.proxyPort={}'.format(parsed_url.port)]

        # Bigtop can run multiple smoke tests at once; construct the right args.
        comp_args = ['bigtop-tests:smoke-tests:%s:test' % c for c in smoke_components]
        gradlew_args = ['-Psmoke.tests', '--info'] + proxy_args + comp_args

        hookenv.log('Bigtop smoke test environment: {}'.format(subprocess_env))
        hookenv.log('Bigtop smoke test args: {}'.format(gradlew_args))
        with chdir(self.bigtop_base):
            try:
                utils.run_as('ubuntu', './gradlew', *gradlew_args,
                             env=subprocess_env)
                smoke_out = 'success'
            except subprocess.CalledProcessError as e:
                smoke_out = e.output
        return smoke_out
    def run_smoke_tests(self, smoke_components=None, smoke_env=None):
        """
        Run the Bigtop smoke tests for given components using the gradle
        wrapper script.

        :param list smoke_components: Bigtop components to smoke test
        :param list smoke_env: Dict of required environment variables (merged
            with /etc/environment)
        """
        if not is_state('bigtop.available'):
            hookenv.log('Bigtop is not ready to run smoke tests')
            return None
        if not smoke_components:
            hookenv.log('Missing Bigtop smoke test component list')
            return None

        # We always need TERM and JAVA_HOME; merge with any user provided dict
        subprocess_env = {'TERM': 'dumb', 'JAVA_HOME': java_home()}
        if isinstance(smoke_env, dict):
            subprocess_env.update(smoke_env)

        # Ensure the base dir is owned by ubuntu so we can create a .gradle dir.
        chownr(self.bigtop_base, 'ubuntu', 'ubuntu', chowntopdir=True)

        # Gradle doesn't honor env proxies; check for either http* or HTTP* and
        # set cli args as needed.
        http_url = os.environ.get('http_proxy', os.environ.get('HTTP_PROXY'))
        https_url = os.environ.get('https_proxy', os.environ.get('HTTPS_PROXY'))
        proxy_args = []
        if http_url:
            parsed_url = urlparse(http_url)
            proxy_args += ['-Dhttp.proxyHost={}'.format(parsed_url.hostname),
                           '-Dhttp.proxyPort={}'.format(parsed_url.port)]
        if https_url:
            parsed_url = urlparse(https_url)
            proxy_args += ['-Dhttps.proxyHost={}'.format(parsed_url.hostname),
                           '-Dhttps.proxyPort={}'.format(parsed_url.port)]

        # Bigtop can run multiple smoke tests at once; construct the right args.
        comp_args = ['bigtop-tests:smoke-tests:%s:test' % c for c in smoke_components]
        gradlew_args = ['-Psmoke.tests', '--info'] + proxy_args + comp_args

        hookenv.log('Bigtop smoke test environment: {}'.format(subprocess_env))
        hookenv.log('Bigtop smoke test args: {}'.format(gradlew_args))
        with chdir(self.bigtop_base):
            try:
                utils.run_as('ubuntu', './gradlew', *gradlew_args,
                             env=subprocess_env)
                smoke_out = 'success'
            except subprocess.CalledProcessError as e:
                smoke_out = e.output
        return smoke_out
Beispiel #26
0
def install_burrow():
    # Install dep https://github.com/golang/dep
    url = "https://raw.githubusercontent.com/golang/dep/master/install.sh"
    wget.download(url=url, out='/home/ubuntu/dep-installer.sh')
    os.chmod('/home/ubuntu/dep-installer.sh', 0o755)
    try:
        #output = run('/home/ubuntu/dep-installer.sh')
        #output.check_returncode()
        utils.run_as('root', '/home/ubuntu/dep-installer.sh')
    except CalledProcessError as e:
        log(e)
        status_set('blocked', 'Failed to install dep.')
        return

    previous_wd = os.getcwd()
    go_env = go_environment()
    utils.run_as('ubuntu', 'go', 'get', 'github.com/linkedin/Burrow')
    os.chdir(go_env['GOPATH'] + '/src/github.com/linkedin/Burrow')
    utils.run_as('ubuntu', 'dep', 'ensure')
    utils.run_as('ubuntu', 'go', 'install')
    dirs = [
        '/home/ubuntu/burrow', '/home/ubuntu/burrow/log',
        '/home/ubuntu/burrow/config'
    ]
    for dir in dirs:
        if not os.path.exists(dir):
            os.makedirs(dir)
    os.chdir(previous_wd)
    set_flag('burrow.installed')
Beispiel #27
0
 def start(self):
     """
     Override start to use InsightEdge's wrapper.
     """
     # Start if we're not already running. We currently dont have any
     # runtime config options, so no need to restart when hooks fire.
     if not utils.jps("zeppelin"):
         ie_home = self.dist_config.path('insightedge')
         zeppelin_home = self.dist_config.path('zeppelin')
         # chdir here because things like zepp tutorial think ZEPPELIN_HOME
         # is wherever the daemon was started from.
         with host.chdir(zeppelin_home):
             utils.run_as('ubuntu',
                          '{}/sbin/start-zeppelin.sh'.format(ie_home))
         # wait up to 30s for API to start, lest requests fail
         self.wait_for_api(30)
Beispiel #28
0
    def install(self):
        '''
        Perform initial one-time setup, workaround upstream bugs, and
        trigger puppet.
        '''
        # Dirs are handled by the bigtop deb, so no need to call out to
        # dist_config to do that work.  However, we want to adjust the
        # groups for the `ubuntu` user for better interaction with Juju.
        self.dist_config.add_users()

        # Set ports based on layer.yaml options
        self._add_override('zeppelin::server::server_port',
                           self.dist_config.port('zeppelin'))
        self._add_override('zeppelin::server::web_socket_port',
                           self.dist_config.port('zeppelin_web'))

        # Default spark to local mode on initial install. This will be
        # reconfigured if/when hadoop or spark relations are made.
        self._add_override('zeppelin::server::spark_master_url', 'local[*]')

        ##########
        # BUG: BIGTOP-2742
        # Default zeppelin init script looks for the literal '$(hostname)'
        # string. Symlink it so it exists before the apt install from puppet
        # tries to start the service.
        import subprocess
        host = subprocess.check_output(['hostname']).decode('utf8').strip()
        zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
        utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
        utils.run_as('root', 'ln', '-sf',
                     zepp_pid,
                     '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
        ##########

        self.trigger_bigtop()

        ##########
        # BUG: BIGTOP-2742
        # Puppet apply will call systemctl daemon-reload, which removes the
        # symlink we just created. Now that the bits are on disk, update the
        # init script $(hostname) that caused this mess to begin with.
        zepp_init_script = '/etc/init.d/zeppelin'
        utils.re_edit_in_place(zepp_init_script, {
            r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
        })
        utils.run_as('root', 'systemctl', 'daemon-reload')
        self.restart()
        self.wait_for_api(30)
        ##########

        ##########
        # BUG: BIGTOP-2154
        # The zep deb depends on spark-core and spark-python. However, because
        # of the unholy requirement to have hive tightly coupled to spark,
        # we need to ensure spark-datanucleus is installed. Do this after the
        # initial install so the bigtop repo is available to us.
        utils.run_as('root', 'apt-get', 'install', '-qy', 'spark-datanucleus')
Beispiel #29
0
def update_zoo_cfg(zkid=getid(local_unit()), ip=unit_private_ip(), remove=False):
    '''
    Configuration for a Zookeeper quorum requires listing all unique servers
    (server.X=<ip>:2888:3888) in the zoo.cfg. This function updates server.X
    entries and restarts the zookeeper service.
    '''
    zookeeper_cfg = "{}/zoo.cfg".format(os.environ.get('ZOOCFGDIR', '/etc/zookeeper/conf'))
    key = "server.{}".format(zkid)
    value = "={}:2888:3888".format(ip)
    if remove:
        removeKV(zookeeper_cfg, key)
        return
    addKV(zookeeper_cfg, key, value)

    # restart the zk server after alterting zoo.cfg
    zookeeper_bin = os.environ.get('ZOO_BIN_DIR', '/usr/lib/zookeeper/bin')
    utils.run_as('zookeeper', '{}/zkServer.sh'.format(zookeeper_bin), 'restart')
Beispiel #30
0
    def install(self, force=False):
        if not force and self.is_installed():
            return
        self.dist_config.add_dirs()
        self.dist_config.add_packages()
        jujuresources.install(self.resources['spark'],
                              destination=self.dist_config.path('spark'),
                              skip_top_level=True)
        self.setup_spark_config()
        self.install_demo()

        # create hdfs storage space
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory')

        unitdata.kv().set('spark.installed', True)
        unitdata.kv().flush(True)
Beispiel #31
0
 def stop(self):
     """
     Override start to use InsightEdge's wrapper.
     """
     # Start if we're not already running. We currently dont have any
     # runtime config options, so no need to restart when hooks fire.
     if not utils.jps("zeppelin"):
         ie_home = self.dist_config.path('insightedge')
         zeppelin_home = self.dist_config.path('zeppelin')
         # chdir here because things like zepp tutorial think ZEPPELIN_HOME
         # is wherever the daemon was started from.
         with host.chdir(zeppelin_home):
             utils.run_as('ubuntu',
                          '{}/sbin/stop-zeppelin.sh'.format(ie_home))
         # wait for the process to stop, since issuing a start while the
         # process is still running (i.e., restart) could cause it to not
         # start up again
         self.wait_for_stop(30)
    def install(self):
        self.dist_config.add_dirs()

        # Copy our start/stop scripts (preserving attrs) to $HOME
        start_source = 'scripts/start_notebook.sh'
        Path(start_source).chmod(0o755)
        Path(start_source).chown('ubuntu', 'hadoop')

        stop_source = 'scripts/stop_notebook.sh'
        Path(stop_source).chmod(0o755)
        Path(stop_source).chown('ubuntu', 'hadoop')

        target = os.environ.get('HOME', '/home/ubuntu')
        Path(start_source).copy2(target)
        Path(stop_source).copy2(target)

        # Create an IPython profile
        utils.run_as("ubuntu", 'ipython', 'profile', 'create', 'pyspark')
    def trigger_puppet(self):
        """
        Trigger Puppet to install the desired components.
        """
        java_version = unitdata.kv().get('java_version', '')
        if java_version.startswith('1.7.') and len(get_fqdn()) > 64:
            # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
            # java to do a hostname lookup on clouds that have >64 char FQDNs
            # (e.g., gce). Attempt to work around this by putting the (hopefully
            # short) hostname into /etc/hosts so that it will (hopefully) be
            # used instead (see http://paste.ubuntu.com/16230171/).
            # NB: do this before the puppet apply, which may call java stuffs
            # like format namenode, which will fail if we dont get this fix
            # down early.
            short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
            private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
            if short_host and private_ip:
                utils.update_kv_host(private_ip, short_host)
                utils.manage_etc_hosts()

        # puppet args are bigtop-version depedent
        if self.bigtop_version == '1.1.0':
            puppet_args = [
                '-d',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests/site.pp'
            ]
        else:
            puppet_args = [
                '-d',
                '--parser=future',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests'
            ]

        # puppet apply runs from the root of the bigtop release source
        with chdir(self.bigtop_base):
            utils.run_as('root', 'puppet', 'apply', *puppet_args)

        # Do any post-puppet config on the generated config files.
        utils.re_edit_in_place('/etc/default/bigtop-utils', {
            r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format(
                java_home()),
        })
    def trigger_puppet(self):
        """
        Trigger Puppet to install the desired components.
        """
        java_version = unitdata.kv().get('java_version', '')
        if java_version.startswith('1.7.') and len(get_fqdn()) > 64:
            # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
            # java to do a hostname lookup on clouds that have >64 char FQDNs
            # (e.g., gce). Attempt to work around this by putting the (hopefully
            # short) hostname into /etc/hosts so that it will (hopefully) be
            # used instead (see http://paste.ubuntu.com/16230171/).
            # NB: do this before the puppet apply, which may call java stuffs
            # like format namenode, which will fail if we dont get this fix
            # down early.
            short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
            private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
            if short_host and private_ip:
                utils.update_kv_host(private_ip, short_host)
                utils.manage_etc_hosts()

        # puppet args are bigtop-version depedent
        if self.bigtop_version == '1.1.0':
            puppet_args = [
                '-d',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests/site.pp'
            ]
        else:
            puppet_args = [
                '-d',
                '--parser=future',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests'
            ]

        # puppet apply runs from the root of the bigtop release source
        with chdir(self.bigtop_base):
            utils.run_as('root', 'puppet', 'apply', *puppet_args)

        # Do any post-puppet config on the generated config files.
        utils.re_edit_in_place('/etc/default/bigtop-utils', {
            r'(# )?export JAVA_HOME.*': 'export JAVA_HOME={}'.format(
                java_home()),
        })
Beispiel #35
0
    def install(self, force=false):
        if not force and self.is_installed():
            return

        # download Bigtop release; unpack the recipes
        bigtop_dir = '/home/ubuntu/bigtop.release'
        if not unitdata.kv().get('bigtop-release.installed', False):
            Path(bigtop_dir).rmtree_p()
            au = ArchiveUrlFetchHandler()
            au.install(bigtop_dir, '/home/ubuntu')

            unitdata.kv().set('bigtop-release.installed', True)
            unitdata.kv().flush(True)

        hiera_dst = self.dist_config.bigtop_hiera('path')
        hiera_conf = self.dist_config.bigtop_hiera('source')
        utils.re_edit_in_place(
            hiera_conf, {
                r'.*:datadir.*': '{0}/{1}'.format(hiera_dst, hiera_conf),
            })

        # generate site.yaml. Something like this would do
        setup_bigtop_config(
            bigtop_dir,
            "{0}/hieradata/site.yaml".format(os.path.dirname(hiera_conf)))

        # install required puppet modules
        try:
            utils.run_as('root', 'puppet', 'module', 'install',
                         'puppetlabs-stdlib', 'puppetlabs-apt')
        except CalledProcessError:
            pass  # All modules are set

        try:
            utils.run_as(
                'root', 'root', 'puppet', 'apply', '-d',
                '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                'bigtop-deploy/puppet/manifests/site.pp')
        except CalledProcessError:
            pass  # Everything seems to be fine

        unitdata.kv().set('bigtop.installed', True)
        unitdata.kv().flush(True)
    def configure_flume(self):
        config = hookenv.config()        
        templating.render(
            source='flume.conf.j2',
            target=self.dist_config.path('flume_conf') / 'flume.conf',
            context={'dist_config': self.dist_config, 'config': config})

        flume_bin = self.dist_config.path('flume') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if flume_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], flume_bin])
            env['FLUME_CONF_DIR'] = self.dist_config.path('flume_conf')
            env['FLUME_CLASSPATH'] = self.dist_config.path('flume') / 'lib'
            env['FLUME_HOME'] = self.dist_config.path('flume')

        # flume_env = self.dist_config.path('flume_conf') / 'flume-env.sh'
        # utils.re_edit_in_place(flume_env, {
        # })
        utils.run_as('flume', 'hdfs', 'dfs', '-mkdir', '-p', '/user/flume')
Beispiel #37
0
    def run(self, user, command, *args, **kwargs):
        """
        Run a Hadoop command as the `hdfs` user.

        :param str command: Command to run, prefixed with `bin/` or `sbin/`
        :param list args: Additional args to pass to the command
        """
        return utils.run_as(user,
                            self.dist_config.path('hadoop') / command,
                            *args, **kwargs)
Beispiel #38
0
    def run(self, user, command, *args, **kwargs):
        """
        Run a Hadoop command as the `hdfs` user.

        :param str command: Command to run, prefixed with `bin/` or `sbin/`
        :param list args: Additional args to pass to the command
        """
        return utils.run_as(user,
                            self.dist_config.path('hadoop') / command, *args,
                            **kwargs)
Beispiel #39
0
def update_zoo_cfg(
        zkid=getid(local_unit()), ip=unit_private_ip(), remove=False):
    '''
    Configuration for a Zookeeper quorum requires listing all unique servers
    (server.X=<ip>:2888:3888) in the zoo.cfg. This function updates server.X
    entries and restarts the zookeeper service.
    '''
    zookeeper_cfg = "{}/zoo.cfg".format(
        os.environ.get('ZOOCFGDIR', '/etc/zookeeper/conf'))
    key = "server.{}".format(zkid)
    value = "={}:2888:3888".format(ip)
    if remove:
        removeKV(zookeeper_cfg, key)
        return
    addKV(zookeeper_cfg, key, value)

    # restart the zk server after alterting zoo.cfg
    zookeeper_bin = os.environ.get('ZOO_BIN_DIR', '/usr/lib/zookeeper/bin')
    utils.run_as('zookeeper', '{}/zkServer.sh'.format(zookeeper_bin),
                 'restart')
Beispiel #40
0
    def setup_init_scripts(self):
        templates_list = ['history', 'master', 'slave']
        for template in templates_list:
            if host.init_is_systemd():
                template_path = '/etc/systemd/system/spark-{}.service'.format(
                    template)
            else:
                template_path = '/etc/init/spark-{}.conf'.format(template)
            if os.path.exists(template_path):
                os.remove(template_path)

        self.stop()

        mode = hookenv.config()['spark_execution_mode']
        templates_list = ['history']
        if mode == 'standalone':
            templates_list.append('master')
            templates_list.append('slave')

        for template in templates_list:
            template_name = '{}-upstart.conf'.format(template)
            template_path = '/etc/init/spark-{}.conf'.format(template)
            if host.init_is_systemd():
                template_name = '{}-systemd.conf'.format(template)
                template_path = '/etc/systemd/system/spark-{}.service'.format(
                    template)

            render(
                template_name,
                template_path,
                context={
                    'spark_bin': self.dist_config.path('spark'),
                    'master': self.get_master()
                },
            )
            if host.init_is_systemd():
                utils.run_as('root', 'systemctl', 'enable',
                             'spark-{}.service'.format(template))

        if host.init_is_systemd():
            utils.run_as('root', 'systemctl', 'daemon-reload')
    def configure_events_dir(self, mode):
        """
        Create directory for spark event data.

        This directory is used by workers to store event data. It is also read
        by the history server when displaying event information.

        :param string mode: Spark execution mode to determine the dir location.
        """
        dc = self.dist_config

        # Directory needs to be 777 so non-spark users can write job history
        # there. It needs to be g+s (HDFS is g+s by default) so all entries
        # are readable by spark (in the spark group). It needs to be +t so
        # users cannot remove files they don't own.
        if mode.startswith('yarn'):
            events_dir = 'hdfs://{}'.format(dc.path('spark_events'))
            utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir)
            utils.run_as('hdfs', 'hdfs', 'dfs', '-chmod', '1777', events_dir)
            utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:spark',
                         events_dir)
        else:
            events_dir = dc.path('spark_events')
            events_dir.makedirs_p()
            events_dir.chmod(0o3777)
            host.chownr(events_dir, 'ubuntu', 'spark', chowntopdir=True)
Beispiel #42
0
    def configure_events_dir(self, mode):
        """
        Create directory for spark event data.

        This directory is used by workers to store event data. It is also read
        by the history server when displaying event information.

        :param string mode: Spark execution mode to determine the dir location.
        """
        dc = self.dist_config

        # Directory needs to be 777 so non-spark users can write job history
        # there. It needs to be g+s (HDFS is g+s by default) so all entries
        # are readable by spark (in the spark group). It needs to be +t so
        # users cannot remove files they don't own.
        if mode.startswith('yarn'):
            events_dir = 'hdfs://{}'.format(dc.path('spark_events'))
            utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir)
            utils.run_as('hdfs', 'hdfs', 'dfs', '-chmod', '1777', events_dir)
            utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:spark',
                         events_dir)
        else:
            events_dir = dc.path('spark_events')
            events_dir.makedirs_p()
            events_dir.chmod(0o3777)
            host.chownr(events_dir, 'ubuntu', 'spark', chowntopdir=True)
def get_fqdn():
    """
    Return the FQDN as known by 'facter'.

    This must be run with utils.run_as to ensure any /etc/environment changes
    are honored. We may have overriden 'facter fqdn' with a 'FACTER_fqdn'
    environment variable (see Bigtop.check_localdomain).

    :returns: Sensible hostname (true FQDN or FACTER_fqdn from /etc/environment)
    """
    hostname = utils.run_as('ubuntu',
                            'facter', 'fqdn', capture_output=True)
    return hostname.strip()
    def check_hdfs_setup(self):
        """
        Check if the initial setup has been done in HDFS.

        This currently consists of initializing the /user/ubuntu directory.
        """
        try:
            output = utils.run_as('hdfs',
                                  'hdfs', 'dfs', '-stat', '%u', '/user/ubuntu',
                                  capture_output=True)
            return output.strip() == 'ubuntu'
        except subprocess.CalledProcessError:
            return False
Beispiel #45
0
    def install(self):
        '''
        Perform initial one-time setup, workaround upstream bugs, and
        trigger puppet.
        '''
        # Dirs are handled by the bigtop deb, so no need to call out to
        # dist_config to do that work.  However, we want to adjust the
        # groups for the `ubuntu` user for better interaction with Juju.
        self.dist_config.add_users()

        # Set ports based on layer.yaml options
        self._add_override('zeppelin::server::server_port',
                           self.dist_config.port('zeppelin'))
        self._add_override('zeppelin::server::web_socket_port',
                           self.dist_config.port('zeppelin_websocket'))

        # Default spark to local mode on initial install. This will be
        # reconfigured if/when hadoop or spark relations are made.
        local_master = 'local[*]'
        self._add_override('zeppelin::server::spark_master_url', local_master)

        # The spark-client role expects hdfs by default. Since we want to
        # keep Hadoop optional, ensure we remove hadoopy bits from our
        # local spark config. This has no effect if/when a remote spark joins,
        # and since there is no spark history server running, the event dirs
        # are not important -- they just need not be 'hdfs:///blah'.
        events_log_dir = 'file:///tmp'
        self._add_override('spark::common::master_url', local_master)
        self._add_override('spark::common::event_log_dir', events_log_dir)
        self._add_override('spark::common::history_log_dir', events_log_dir)

        ##########
        # BUG: BIGTOP-2742
        # Default zeppelin init script looks for the literal '$(hostname)'
        # string. Symlink it so it exists before the apt install from puppet
        # tries to start the service.
        import subprocess
        host = subprocess.check_output(['hostname']).decode('utf8').strip()
        zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
        utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
        utils.run_as('root', 'ln', '-sf', zepp_pid,
                     '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
        ##########

        self.trigger_bigtop()

        ##########
        # BUG: BIGTOP-2742
        # Puppet apply will call systemctl daemon-reload, which removes the
        # symlink we just created. Now that the bits are on disk, update the
        # init script $(hostname) that caused this mess to begin with.
        zepp_init_script = '/etc/init.d/zeppelin'
        utils.re_edit_in_place(
            zepp_init_script, {
                r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
            })
        utils.run_as('root', 'systemctl', 'daemon-reload')
        self.restart()
        self.wait_for_api(30)
    def check_hdfs_setup(self):
        """
        Check if the initial setup has been done in HDFS.

        This currently consists of initializing the /user/ubuntu directory.
        """
        try:
            output = utils.run_as('hdfs',
                                  'hdfs', 'dfs', '-stat', '%u', '/user/ubuntu',
                                  capture_output=True)
            return output.strip() == 'ubuntu'
        except subprocess.CalledProcessError:
            return False
    def configure_hive(self, mysql):
        config = hookenv.config()
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format(
                mysql.host(), mysql.port(), mysql.database()
            )
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver"
            props['hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[self.cpu_arch]

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'],
            r'.*export HIVE_AUX_JARS_PATH *=.*': 'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar',
        })

        # Now that we have db connection info, init our schema (only once)
        if not unitdata.kv().get('hive.schema.initialized'):
            utils.run_as('hive', 'schematool', '-initSchema', '-dbType', 'mysql')
            unitdata.kv().set('hive.schema.initialized', True)
def get_fqdn():
    """
    Return the FQDN as known by 'facter'.

    This must be run with utils.run_as to ensure any /etc/environment changes
    are honored. We may have overriden 'facter fqdn' with a 'FACTER_fqdn'
    environment variable (see Bigtop.check_localdomain).

    :returns: Sensible hostname (true FQDN or FACTER_fqdn from /etc/environment)
    """
    hostname = utils.run_as('ubuntu',
                            'facter', 'fqdn', capture_output=True)
    return hostname.strip()
Beispiel #49
0
    def install(self):
        '''
        Perform initial one-time setup, workaround upstream bugs, and
        trigger puppet.
        '''
        # Dirs are handled by the bigtop deb, so no need to call out to
        # dist_config to do that work.  However, we want to adjust the
        # groups for the `ubuntu` user for better interaction with Juju.
        self.dist_config.add_users()

        # Set ports based on layer.yaml options
        self._add_override('zeppelin::server::server_port',
                           self.dist_config.port('zeppelin'))
        self._add_override('zeppelin::server::web_socket_port',
                           self.dist_config.port('zeppelin_websocket'))

        # Default spark to local mode on initial install. This will be
        # reconfigured if/when hadoop or spark relations are made.
        local_master = 'local[*]'
        self._add_override('zeppelin::server::spark_master_url', local_master)

        # The spark-client role expects hdfs by default. Since we want to
        # keep Hadoop optional, ensure we remove hadoopy bits from our
        # local spark config. This has no effect if/when a remote spark joins,
        # and since there is no spark history server running, the event dirs
        # are not important -- they just need not be 'hdfs:///blah'.
        events_log_dir = 'file:///tmp'
        self._add_override('spark::common::master_url', local_master)
        self._add_override('spark::common::event_log_dir', events_log_dir)
        self._add_override('spark::common::history_log_dir', events_log_dir)

        ##########
        # BUG: BIGTOP-2742
        # Default zeppelin init script looks for the literal '$(hostname)'
        # string. Symlink it so it exists before the apt install from puppet
        # tries to start the service.
        import subprocess
        host = subprocess.check_output(['hostname']).decode('utf8').strip()
        zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
        utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
        utils.run_as('root', 'ln', '-sf',
                     zepp_pid,
                     '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
        ##########

        self.trigger_bigtop()

        ##########
        # BUG: BIGTOP-2742
        # Puppet apply will call systemctl daemon-reload, which removes the
        # symlink we just created. Now that the bits are on disk, update the
        # init script $(hostname) that caused this mess to begin with.
        zepp_init_script = '/etc/init.d/zeppelin'
        utils.re_edit_in_place(zepp_init_script, {
            r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
        })
        utils.run_as('root', 'systemctl', 'daemon-reload')
        self.restart()
        self.wait_for_api(30)
 def start(self):
     flink_home = self.dist_config.path('flink')
     self.stop()
     containers = hookenv.config()['containers']
     containermem = hookenv.config()['containermem']
     start_flink = utils.run_as('flink', '{}/bin/yarn-session.sh'.format(flink_home),
                  'start', '-n', containers, '-tm', containermem, '-d',
                  capture_output=True)
     for line in start_flink.splitlines():
         print(line)
         if 'kill' in line:
             flink_appID = line.split(" ")[3]
     unitdata.kv().set('flink.ID', flink_appID)
    def install(self, force=false):
        if not force and self.is_installed():
            return

        # download Bigtop release; unpack the recipes
        bigtop_dir = '/home/ubuntu/bigtop.release'
        if not unitdata.kv().get('bigtop-release.installed', False):
            Path(bigtop_dir).rmtree_p()
            au = ArchiveUrlFetchHandler()
            au.install(bigtop_dir, '/home/ubuntu')

            unitdata.kv().set('bigtop-release.installed', True)
            unitdata.kv().flush(True)

        hiera_dst = self.dist_config.bigtop_hiera('path')
        hiera_conf = self.dist_config.bigtop_hiera('source')
        utils.re_edit_in_place(hiera_conf, {
            r'.*:datadir.*': '{0}/{1}'.format(hiera_dst, hiera_conf),
        })

        # generate site.yaml. Something like this would do
        setup_bigtop_config(bigtop_dir, "{0}/hieradata/site.yaml".format(os.path.dirname(hiera_conf)))

        # install required puppet modules
        try:
            utils.run_as('root', 'puppet', 'module', 'install', 'puppetlabs-stdlib', 'puppetlabs-apt')
        except CalledProcessError:
            pass # All modules are set

        try:
            utils.run_as('root', 'root', 'puppet', 'apply', '-d',
                         '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                         'bigtop-deploy/puppet/manifests/site.pp')
        except CalledProcessError:
            pass  # Everything seems to be fine

        unitdata.kv().set('bigtop.installed', True)
        unitdata.kv().flush(True)
Beispiel #52
0
    def setup_init_script(self, user, servicename):
        daemon = "yarn"
        if user == "hdfs":
            daemon = "hadoop"
        elif user == "mapred":
            daemon = "mr-jobhistory"

        template_name = 'templates/upstart.conf'
        target_template_path = '/etc/init/{}.conf'.format(servicename)
        if host.init_is_systemd():
            template_name = 'templates/systemd.conf'
            target_template_path = '/etc/systemd/system/{}.service'.format(
                servicename)

        d = os.path.dirname(sys.modules['jujubigdata'].__file__)
        source_template_path = os.path.join(d, template_name)

        if os.path.exists(target_template_path):
            os.remove(target_template_path)

        render(
            source_template_path,
            target_template_path,
            templates_dir="/",
            context={
                'service': servicename,
                'user': user,
                'hadoop_path': self.dist_config.path('hadoop'),
                'hadoop_conf': self.dist_config.path('hadoop_conf'),
                'daemon': daemon,
            },
        )
        if host.init_is_systemd():
            utils.run_as('root', 'systemctl', 'enable',
                         '{}.service'.format(servicename))

        if host.init_is_systemd():
            utils.run_as('root', 'systemctl', 'daemon-reload')
    def setup_init_scripts(self):
        templates_list = ['history', 'master', 'slave']
        for template in templates_list:
            if host.init_is_systemd():
                template_path = '/etc/systemd/system/spark-{}.service'.format(template)
            else:
                template_path = '/etc/init/spark-{}.conf'.format(template)
            if os.path.exists(template_path):
                os.remove(template_path)

        self.stop()

        mode = hookenv.config()['spark_execution_mode']
        templates_list = ['history']
        if mode == 'standalone':
            templates_list.append('master')
            templates_list.append('slave')

        for template in templates_list:
            template_name = '{}-upstart.conf'.format(template)
            template_path = '/etc/init/spark-{}.conf'.format(template)
            if host.init_is_systemd():
                template_name = '{}-systemd.conf'.format(template)
                template_path = '/etc/systemd/system/spark-{}.service'.format(template)

            render(
                template_name,
                template_path,
                context={
                    'spark_bin': self.dist_config.path('spark'),
                    'master': self.get_master()
                },
            )
            if host.init_is_systemd():
                utils.run_as('root', 'systemctl', 'enable', 'spark-{}.service'.format(template))

        if host.init_is_systemd():
            utils.run_as('root', 'systemctl', 'daemon-reload')
Beispiel #54
0
 def stop(self):
     spark_home = self.dist_config.path('spark')
     # Only stop services if they're running
     if utils.jps("HistoryServer"):
         utils.run_as('ubuntu', '{}/sbin/stop-history-server.sh'.format(spark_home))
     if utils.jps("Master"):
         utils.run_as('ubuntu', '{}/sbin/stop-master.sh'.format(spark_home))
     if utils.jps("Worker"):
         utils.run_as('ubuntu', '{}/sbin/stop-slave.sh'.format(spark_home))
Beispiel #55
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        if unitdata.kv().get('zeppelin.version.repo', False):
            hookenv.log("An upgrade is available and the site.yaml has been "
                        "configured. Run the 'reinstall' action to continue.",
                        level=hookenv.INFO)
        else:
            ####################################################################
            # BUG: BIGTOP-2742
            # Default zeppelin init script looks for the literal '$(hostname)'
            # string. Symlink it so it exists before the apt install from puppet
            # tries to start the service.
            import subprocess
            host = subprocess.check_output(['hostname']).decode('utf8').strip()
            zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
            utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
            utils.run_as('root', 'ln', '-sf',
                         zepp_pid,
                         '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
            ####################################################################

            bigtop.trigger_puppet()
            self.wait_for_api(30)

            ####################################################################
            # BUG: BIGTOP-2742
            # Puppet apply will call systemctl daemon-reload, which removes the
            # symlink we just created. Now that the bits are on disk, update the
            # init script $(hostname) that caused this mess to begin with.
            zepp_init_script = '/etc/init.d/zeppelin'
            utils.re_edit_in_place(zepp_init_script, {
                r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
            })
            utils.run_as('root', 'systemctl', 'daemon-reload')
            self.restart()
            self.wait_for_api(30)
Beispiel #56
0
    def setup_hive_config(self):
        '''
        copy the default configuration files to hive_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('hive') / 'conf'
        hive_conf = self.dist_config.path('hive_conf')
        hive_conf.rmtree_p()
        default_conf.copytree(hive_conf)

        # Configure immutable bits
        hive_bin = self.dist_config.path('hive') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hive_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hive_bin])
            env['HIVE_CONF_DIR'] = self.dist_config.path('hive_conf')

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') /
             'hive-env.sh.template').copy(hive_env)

        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        if not hive_site.exists():
            (self.dist_config.path('hive_conf') /
             'hive-default.xml.template').copy(hive_site)
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            # TODO (kwm): we should be able to export java.io.tmpdir so these 4 arent needed
            props['hive.exec.local.scratchdir'] = "/tmp/hive"
            props['hive.downloaded.resources.dir'] = "/tmp/hive_resources"
            props['hive.querylog.location'] = "/tmp/hive"
            props['hive.server2.logging.operation.log.location'] = "/tmp/hive"
            ####

        # create hdfs storage space
        utils.run_as('hive', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/hive/warehouse')