Esempio n. 1
0
def install_java():
    """Install java just like we do for Hadoop Base.

    This is the same method used to install java in HadoopBase:
    https://github.com/juju-solutions/jujubigdata/blob/master/jujubigdata/handlers.py#L134

    This allows us to run Pig in local mode (which requires Java) without
    any Hadoop. If Hadoop comes along later, we'll already have java installed
    in a way that is compatible with the plugin.

    NOTE: this will go away if/when we support the java interface.
    """
    env = utils.read_etc_env()
    java_installer = Path(jujuresources.resource_path('java-installer'))
    java_installer.chmod(0o755)
    output = check_output([java_installer], env=env).decode('utf8')
    lines = output.strip().splitlines()
    if len(lines) != 2:
        raise ValueError('Unexpected output from java-installer: %s' % output)
    java_home, java_version = lines
    if '_' in java_version:
        java_major, java_release = java_version.split("_")
    else:
        java_major, java_release = java_version, ''
    unitdata.kv().set('java.home', java_home)
    unitdata.kv().set('java.version', java_major)
    unitdata.kv().set('java.version.release', java_release)
Esempio n. 2
0
 def reconfigure_zeppelin(self):
     '''
     Configure zeppelin based on current environment
     '''
     raise NotImplementedError()
     # NB (kwm): this method is not currently called because Bigtop spark
     # doesn't expose these settings. Leaving this here just in case
     # we update the bigtop charms to provide these bits in the future.
     etc_env = utils.read_etc_env()
     hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
     spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g')
     spark_exe_mode = os.environ.get('MASTER', 'yarn-client')
     spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g')
     zeppelin_env = self.dist_config.path(
         'zeppelin_conf') / 'zeppelin-env.sh'
     with open(zeppelin_env, "a") as f:
         f.write('export ZEPPELIN_CLASSPATH_OVERRIDES={}\n'.format(
             hadoop_extra_classpath))
         f.write(
             'export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'
             .format(spark_driver_mem, spark_executor_mem))
         f.write(
             'export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'
             .format(spark_driver_mem, spark_executor_mem))
         f.write('export MASTER={}\n'.format(spark_exe_mode))
Esempio n. 3
0
    def install_java(self):
        """
        Run the java-installer resource to install Java and determine
        the JAVA_HOME and Java version.

        The java-installer must be idempotent and its only output (on stdout)
        should be two lines: the JAVA_HOME path, and the Java version, respectively.

        If there is an error installing Java, the installer should exit
        with a non-zero exit code.
        """
        env = utils.read_etc_env()
        java_installer = Path(jujuresources.resource_path('java-installer'))
        java_installer.chmod(0o755)
        output = check_output([java_installer], env=env).decode('utf8')
        lines = output.strip().splitlines()
        if len(lines) != 2:
            raise ValueError('Unexpected output from java-installer: %s' %
                             output)
        java_home, java_version = lines
        if '_' in java_version:
            java_major, java_release = java_version.split("_")
        else:
            java_major, java_release = java_version, ''
        unitdata.kv().set('java.home', java_home)
        unitdata.kv().set('java.version', java_major)
        unitdata.kv().set('java.version.release', java_release)
Esempio n. 4
0
    def install_java(self):
        """
        Run the java-installer resource to install Java and determine
        the JAVA_HOME and Java version.

        The java-installer must be idempotent and its only output (on stdout)
        should be two lines: the JAVA_HOME path, and the Java version, respectively.

        If there is an error installing Java, the installer should exit
        with a non-zero exit code.
        """
        env = utils.read_etc_env()
        java_installer = Path(jujuresources.resource_path('java-installer'))
        java_installer.chmod(0o755)
        output = check_output([java_installer], env=env).decode('utf8')
        lines = output.strip().splitlines()
        if len(lines) != 2:
            raise ValueError('Unexpected output from java-installer: %s' % output)
        java_home, java_version = lines
        if '_' in java_version:
            java_major, java_release = java_version.split("_")
        else:
            java_major, java_release = java_version, ''
        unitdata.kv().set('java.home', java_home)
        unitdata.kv().set('java.version', java_major)
        unitdata.kv().set('java.version.release', java_release)
Esempio n. 5
0
def install_java():
    """Install java just like we do for Hadoop Base.

    This is the same method used to install java in HadoopBase:
    https://github.com/juju-solutions/jujubigdata/blob/master/jujubigdata/handlers.py#L134

    This allows us to run Pig in local mode (which requires Java) without
    any Hadoop. If Hadoop comes along later, we'll already have java installed
    in a way that is compatible with the plugin.

    NOTE: this will go away if/when we support the java interface.
    """
    env = utils.read_etc_env()
    java_installer = Path(jujuresources.resource_path('java-installer'))
    java_installer.chmod(0o755)
    output = check_output([java_installer], env=env).decode('utf8')
    lines = output.strip().splitlines()
    if len(lines) != 2:
        raise ValueError('Unexpected output from java-installer: %s' % output)
    java_home, java_version = lines
    if '_' in java_version:
        java_major, java_release = java_version.split("_")
    else:
        java_major, java_release = java_version, ''
    unitdata.kv().set('java.home', java_home)
    unitdata.kv().set('java.version', java_major)
    unitdata.kv().set('java.version.release', java_release)
Esempio n. 6
0
    def configure_zeppelin(self):
        '''
        Configure zeppelin environment for all users
        '''
        zeppelin_bin = self.dist_config.path('zeppelin') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if zeppelin_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], zeppelin_bin])
            env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf')

        zeppelin_site = self.dist_config.path(
            'zeppelin_conf') / 'zeppelin-site.xml'
        with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml:
            xml['zeppelin.server.port'] = self.dist_config.port('zeppelin')
            xml['zeppelin.notebook.dir'] = self.dist_config.path(
                'zeppelin_notebooks')

        etc_env = utils.read_etc_env()
        hadoop_conf_dir = etc_env.get('HADOOP_CONF_DIR', '/etc/hadoop/conf')
        spark_home = etc_env.get('SPARK_HOME', '/usr/lib/spark')
        spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g')
        spark_exe_mode = os.environ.get('MASTER', 'yarn-client')
        spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g')
        zeppelin_env = self.dist_config.path(
            'zeppelin_conf') / 'zeppelin-env.sh'
        with open(zeppelin_env, "a") as f:
            f.write('export ZEPPELIN_HOME={}\n'.format(
                self.dist_config.path('zeppelin')))
            f.write(
                'export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'
                .format(spark_driver_mem, spark_executor_mem))
            f.write('export ZEPPELIN_LOG_DIR={}\n'.format(
                self.dist_config.path('zeppelin_logs')))
            f.write(
                'export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n'
            )
            f.write('export ZEPPELIN_NOTEBOOK_DIR={}\n'.format(
                self.dist_config.path('zeppelin_notebooks')))
            f.write('export SPARK_HOME={}\n'.format(spark_home))
            f.write(
                'export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'
                .format(spark_driver_mem, spark_executor_mem))
            f.write('export HADOOP_CONF_DIR={}\n'.format(hadoop_conf_dir))
            f.write(
                'export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n'
                .format(s=spark_home))
            f.write('export MASTER={}\n'.format(spark_exe_mode))

        # User needs write access to zepp's conf to write interpreter.json
        # on server start. chown the whole conf dir, though we could probably
        # touch that file and chown it, leaving the rest owned as root:root.
        # TODO: weigh implications of have zepp's conf dir owned by non-root.
        cmd = "chown -R ubuntu:hadoop {}".format(
            self.dist_config.path('zeppelin_conf'))
        call(cmd.split())
Esempio n. 7
0
    def run_bg(self, user, command, *args):
        """
        Run a Kafka command as the `kafka` user in the background.

        :param str command: Command to run
        :param list args: Additional args to pass to the command
        """
        parts = [command] + list(args)
        quoted = ' '.join("'%s'" % p for p in parts)
        e = utils.read_etc_env()
        Popen(['su', user, '-c', quoted], env=e)
Esempio n. 8
0
    def configure_yarn_mode(self):
        # put the spark jar in hdfs
        spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(
            self.dist_config.path('spark')))[0]
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/share/lib')
        try:
            utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar,
                         '/user/ubuntu/share/lib/spark-assembly.jar')
        except CalledProcessError:
            pass  # jar already in HDFS from another Spark

        with utils.environment_edit_in_place('/etc/environment') as env:
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # create hdfs storage space for history server
        dc = self.dist_config
        prefix = dc.path('log_prefix')
        events_dir = dc.path('spark_events')
        events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, ''))
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir)
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     events_dir)

        # create hdfs storage space for spark-bench
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/spark-bench')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     '/user/ubuntu/spark-bench')

        # ensure user-provided Hadoop works
        hadoop_classpath = utils.run_as('hdfs',
                                        'hadoop',
                                        'classpath',
                                        capture_output=True)
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DIST_CLASSPATH.*':
            'SPARK_DIST_CLASSPATH={}'.format(hadoop_classpath),
        },
                               append_non_matches=True)

        # update spark-defaults
        spark_conf = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*':
            'spark.master {}'.format(self.get_master()),
        },
                               append_non_matches=True)

        unitdata.kv().set('hdfs.available', True)
        unitdata.kv().flush(True)
Esempio n. 9
0
    def run_bg(self, user, command, *args):
        """
        Run a Hive command as the `hive` user in the background.

        :param str command: Command to run
        :param list args: Additional args to pass to the command
        """
        parts = [command] + list(args)
        quoted = ' '.join("'%s'" % p for p in parts)
        e = utils.read_etc_env()
        Popen(['su', user, '-c', quoted], env=e)
    def run_bg(self, user, output_log, command, *args):
        """
        Run a command as the given user in the background.

        :param str user: User to run flume agent
        :param str command: Command to run
        :param list args: Additional args to pass to the command
        """
        parts = [command] + list(args)
        quoted = ' '.join("'%s'" % p for p in parts)
        e = utils.read_etc_env()
        Popen(['su', user, '-c', '{} &> {} &'.format(quoted, output_log)], env=e)
Esempio n. 11
0
    def run_bg(self, user, output_log, command, *args):
        """
        Run a command as the given user in the background.

        :param str user: User to run flume agent
        :param str command: Command to run
        :param list args: Additional args to pass to the command
        """
        parts = [command] + list(args)
        quoted = ' '.join("'%s'" % p for p in parts)
        e = utils.read_etc_env()
        Popen(['su', user, '-c', '{} &> {} &'.format(quoted, output_log)],
              env=e)
    def configure_hadoop_libs(self):
        if unitdata.kv().get('hadoop.extra.installed', False):
            return

        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.driver.extraClassPath .*': 'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath),
            r'.*spark.jars .*': 'spark.jars {}'.format(hadoop_extra_classpath),
        }, append_non_matches=True)

        unitdata.kv().set('hadoop.extra.installed', True)
        unitdata.kv().flush(True)
Esempio n. 13
0
    def run_bg(self, user, command, *args):
        """
        Start a Flume agent as the given user in the background.

        :param str user: User to run flume agent
        :param str command: Command to run
        :param list args: Additional args to pass to the command
        """
        parts = [command] + list(args)
        quoted = ' '.join("'%s'" % p for p in parts)
        # This is here to force explicit execution on the background. Too much output causes Popen to fail. 
        silent = ' '.join([quoted, "2>", "/dev/null", "&"])
        e = utils.read_etc_env()
        Popen(['su', user, '-c', silent], env=e)
Esempio n. 14
0
    def configure_yarn_mode(self):
        # put the spark jar in hdfs
        spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(
                                  self.dist_config.path('spark')))[0]
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/share/lib')
        try:
            utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar,
                         '/user/ubuntu/share/lib/spark-assembly.jar')
        except CalledProcessError:
            pass  # jar already in HDFS from another Spark

        with utils.environment_edit_in_place('/etc/environment') as env:
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # create hdfs storage space for history server
        dc = self.dist_config
        prefix = dc.path('log_prefix')
        events_dir = dc.path('spark_events')
        events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, ''))
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir)
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     events_dir)

        # create hdfs storage space for spark-bench
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/spark-bench')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     '/user/ubuntu/spark-bench')

        # ensure user-provided Hadoop works
        hadoop_classpath = utils.run_as('hdfs', 'hadoop', 'classpath',
                                        capture_output=True)
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DIST_CLASSPATH.*': 'SPARK_DIST_CLASSPATH={}'.format(hadoop_classpath),
        }, append_non_matches=True)

        # update spark-defaults
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*': 'spark.master {}'.format(self.get_master()),
            r'.*spark.driver.extraClassPath .*': 'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath),
        }, append_non_matches=True)

        unitdata.kv().set('hdfs.available', True)
        unitdata.kv().flush(True)
    def configure_zeppelin(self):
        """
        Configure zeppelin environment for all users
        """
        zeppelin_bin = self.dist_config.path("zeppelin") / "bin"
        with utils.environment_edit_in_place("/etc/environment") as env:
            if zeppelin_bin not in env["PATH"]:
                env["PATH"] = ":".join([env["PATH"], zeppelin_bin])
            env["ZEPPELIN_CONF_DIR"] = self.dist_config.path("zeppelin_conf")

        zeppelin_site = self.dist_config.path("zeppelin_conf") / "zeppelin-site.xml"
        with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml:
            xml["zeppelin.server.port"] = self.dist_config.port("zeppelin")
            xml["zeppelin.notebook.dir"] = self.dist_config.path("zeppelin_notebooks")

        etc_env = utils.read_etc_env()
        hadoop_conf_dir = etc_env.get("HADOOP_CONF_DIR", "/etc/hadoop/conf")
        spark_home = etc_env.get("SPARK_HOME", "/usr/lib/spark")
        spark_driver_mem = etc_env.get("SPARK_DRIVER_MEMORY", "1g")
        spark_exe_mode = os.environ.get("MASTER", "yarn-client")
        spark_executor_mem = etc_env.get("SPARK_EXECUTOR_MEMORY", "1g")
        zeppelin_env = self.dist_config.path("zeppelin_conf") / "zeppelin-env.sh"
        with open(zeppelin_env, "a") as f:
            f.write("export ZEPPELIN_HOME={}\n".format(self.dist_config.path("zeppelin")))
            f.write(
                'export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'.format(
                    spark_driver_mem, spark_executor_mem
                )
            )
            f.write("export ZEPPELIN_LOG_DIR={}\n".format(self.dist_config.path("zeppelin_logs")))
            f.write('export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n')
            f.write("export ZEPPELIN_NOTEBOOK_DIR={}\n".format(self.dist_config.path("zeppelin_notebooks")))
            f.write("export SPARK_HOME={}\n".format(spark_home))
            f.write(
                'export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'.format(
                    spark_driver_mem, spark_executor_mem
                )
            )
            f.write("export HADOOP_CONF_DIR={}\n".format(hadoop_conf_dir))
            f.write("export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n".format(s=spark_home))
            f.write("export MASTER={}\n".format(spark_exe_mode))

        # User needs write access to zepp's conf to write interpreter.json
        # on server start. chown the whole conf dir, though we could probably
        # touch that file and chown it, leaving the rest owned as root:root.
        # TODO: weigh implications of have zepp's conf dir owned by non-root.
        cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path("zeppelin_conf"))
        call(cmd.split())
Esempio n. 16
0
    def configure_yarn_mode(self):
        # put the spark jar in hdfs
        spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(
            self.dist_config.path('spark')))[0]
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/share/lib')
        try:
            utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar,
                         '/user/ubuntu/share/lib/spark-assembly.jar')
        except CalledProcessError:
            pass  # jar already in HDFS from another Spark

        with utils.environment_edit_in_place('/etc/environment') as env:
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # create hdfs storage space for history server
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     self.dist_config.path('spark_events'))
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     self.dist_config.path('spark_events'))

        # create hdfs storage space for spark-bench
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/spark-bench')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     '/user/ubuntu/spark-bench')

        # update spark-defaults
        spark_conf = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*':
            'spark.master {}'.format(self.get_master()),
            r'.*spark.eventLog.enabled .*':
            'spark.eventLog.enabled true',
            r'.*spark.eventLog.dir .*':
            'spark.eventLog.dir hdfs://{}'.format(
                self.dist_config.path('spark_events')),
            r'.*spark.driver.extraClassPath .*':
            'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath),
        },
                               append_non_matches=True)

        unitdata.kv().set('hdfs.available', True)
        unitdata.kv().flush(True)
    def configure_zeppelin(self):
        '''
        Configure zeppelin environment for all users
        '''
        zeppelin_bin = self.dist_config.path('zeppelin') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if zeppelin_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], zeppelin_bin])
            env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf')

        zeppelin_site = self.dist_config.path('zeppelin_conf') / 'zeppelin-site.xml'
        with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml:
            xml['zeppelin.server.port'] = self.dist_config.port('zeppelin')
            xml['zeppelin.notebook.dir'] = self.dist_config.path('zeppelin_notebooks')

        etc_env = utils.read_etc_env()
        hadoop_conf_dir = etc_env.get('HADOOP_CONF_DIR', '/etc/hadoop/conf')
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        spark_home = etc_env.get('SPARK_HOME', '/usr/lib/spark')
        spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g')
        spark_exe_mode = os.environ.get('MASTER', 'yarn-client')
        spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g')
        zeppelin_env = self.dist_config.path('zeppelin_conf') / 'zeppelin-env.sh'
        with open(zeppelin_env, "a") as f:
            f.write('export ZEPPELIN_CLASSPATH_OVERRIDES={}\n'.format(hadoop_extra_classpath))
            f.write('export ZEPPELIN_HOME={}\n'.format(self.dist_config.path('zeppelin')))
            f.write('export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'.format(
                spark_driver_mem,
                spark_executor_mem))
            f.write('export ZEPPELIN_LOG_DIR={}\n'.format(self.dist_config.path('zeppelin_logs')))
            f.write('export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n')
            f.write('export ZEPPELIN_NOTEBOOK_DIR={}\n'.format(self.dist_config.path('zeppelin_notebooks')))
            f.write('export SPARK_HOME={}\n'.format(spark_home))
            f.write('export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'.format(
                spark_driver_mem,
                spark_executor_mem))
            f.write('export HADOOP_CONF_DIR={}\n'.format(hadoop_conf_dir))
            f.write('export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n'.format(s=spark_home))
            f.write('export MASTER={}\n'.format(spark_exe_mode))

        # User needs write access to zepp's conf to write interpreter.json
        # on server start. chown the whole conf dir, though we could probably
        # touch that file and chown it, leaving the rest owned as root:root.
        # TODO: weigh implications of have zepp's conf dir owned by non-root.
        cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path('zeppelin_conf'))
        call(cmd.split())
Esempio n. 18
0
    def configure_hadoop_libs(self):
        if unitdata.kv().get('hadoop.extra.installed', False):
            return

        spark_conf = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.driver.extraClassPath .*':
            'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath),
            r'.*spark.jars .*':
            'spark.jars {}'.format(hadoop_extra_classpath),
        },
                               append_non_matches=True)

        unitdata.kv().set('hadoop.extra.installed', True)
        unitdata.kv().flush(True)
Esempio n. 19
0
 def reconfigure_zeppelin(self):
     '''
     Configure zeppelin based on current environment
     '''
     raise NotImplementedError()
     # NB (kwm): this method is not currently called because Bigtop spark
     # doesn't expose these settings. Leaving this here just in case
     # we update the bigtop charms to provide these bits in the future.
     etc_env = utils.read_etc_env()
     hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
     spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g')
     spark_exe_mode = os.environ.get('MASTER', 'yarn-client')
     spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g')
     zeppelin_env = self.dist_config.path('zeppelin_conf') / 'zeppelin-env.sh'
     with open(zeppelin_env, "a") as f:
         f.write('export ZEPPELIN_CLASSPATH_OVERRIDES={}\n'.format(hadoop_extra_classpath))
         f.write('export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'.format(
             spark_driver_mem,
             spark_executor_mem))
         f.write('export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'.format(
             spark_driver_mem,
             spark_executor_mem))
         f.write('export MASTER={}\n'.format(spark_exe_mode))
Esempio n. 20
0
 def configure_oozie_hdfs(self):
     #config = hookenv.config()
     e = utils.read_etc_env()
     utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/oozie', env=e)
     utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'oozie:hadoop', '/user/oozie', env=e)