예제 #1
0
def zookeeper_changed(zookeeper):
    hookenv.log('Checking if Zookeeper has changed')
    zookeeper_servers_string = ''
    filesdir = '{}/files'.format(hookenv.charm_dir())
    for zk_unit in zookeeper.zookeepers():
        zookeeper_servers_string += '{}:{},'.format(zk_unit['host'],
                                                    zk_unit['port'])
    if zookeeper_servers_string[:-1] not in open(
            '{}/nifi-1.3.0/conf/nifi.properties'.format(filesdir)).read():
        hookenv.status_set(
            'maintenance',
            'Zookeeper has changed. Updating Apache NiFi settings and restarting'
        )
        re_edit_in_place(
            '{}/nifi-1.3.0/conf/nifi.properties'.format(filesdir), {
                r'.*nifi.zookeeper.connect.string.*':
                'nifi.zookeeper.connect.string={}'.format(
                    zookeeper_servers_string[:-1])
            })
        try:
            subprocess.check_call([
                'bash', '{}/nifi-1.3.0/bin/nifi.sh'.format(filesdir), 'restart'
            ])
            hookenv.status_set('active',
                               'Running: cluster mode with Zookeeper')
            set_state('apache-nifi.cluster')
        except subprocess.CalledProcessError:
            hookenv.status_set('blocked', 'Failed to restart')
예제 #2
0
    def update_apps(self):
        # Add all services disabled unless we have a joined relation
        # as marked by the respective state
        # Enabled by default: 'filebrowser', 'jobbrowser'
        disabled_services = [
            'beeswax', 'impala', 'security',
            'rdbms', 'jobsub', 'pig', 'hbase', 'sqoop',
            'zookeeper', 'metastore', 'spark', 'oozie', 'indexer', 'search']

        for key in get_states():
            if "joined" in key:
                relname = key.split('.')[0]
                if 'hive' in relname:
                    disabled_services.remove('beeswax')
                    disabled_services.remove('metastore')
                if 'spark' in relname:
                    disabled_services.remove('spark')
                if 'oozie' in relname:
                    disabled_services.remove('oozie')
                if 'zookeeper' in relname:
                    disabled_services.remove('zookeeper')

        hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
        services_string = ','.join(disabled_services)
        hookenv.log("Disabled apps {}".format(services_string))
        utils.re_edit_in_place(hue_config, {
            r'.*app_blacklist=.*': ''.join(('app_blacklist=', services_string))
            })

        self.check_relations()
예제 #3
0
 def configure_spark(self, hostname, port):
     #hookenv.log("configuring spark connection via livy")
     hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
     utils.re_edit_in_place(hue_config, {
         r'.*livy_server_host *=.*': 'livy_server_host=%s' % hostname,
         r'.*livy_server_port *=.*': 'livy_server_port=%s' % port
         })  
예제 #4
0
    def setup_spark_config(self):
        '''
        copy the default configuration files to spark_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('spark') / 'conf'
        spark_conf = self.dist_config.path('spark_conf')
        spark_conf.rmtree_p()
        default_conf.copytree(spark_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        default_conf.rmtree_p()
        spark_conf.symlink(default_conf)

        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        if not spark_env.exists():
            (self.dist_config.path('spark_conf') / 'spark-env.sh.template').copy(spark_env)
        spark_default = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        if not spark_default.exists():
            (self.dist_config.path('spark_conf') / 'spark-defaults.conf.template').copy(spark_default)
        spark_log4j = self.dist_config.path('spark_conf') / 'log4j.properties'
        if not spark_log4j.exists():
            (self.dist_config.path('spark_conf') / 'log4j.properties.template').copy(spark_log4j)
        utils.re_edit_in_place(spark_log4j, {
            r'log4j.rootCategory=INFO, console': 'log4j.rootCategory=ERROR, console',
        })
예제 #5
0
    def configure_kafka(self):
        # Get ip:port data from our connected zookeepers
        if Zookeeper().connected_units() and Zookeeper().is_ready():
            zks = []
            for unit, data in Zookeeper().filtered_data().items():
                ip = utils.resolve_private_address(data['private-address'])
                zks.append("%s:%s" % (ip, data['port']))
            zks.sort()
            zk_connect = ",".join(zks)

            # update consumer props
            cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })

            # update server props
            cfg = self.dist_config.path('kafka_conf') / 'server.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })
        else:
            # if we have no zookeepers, make sure kafka is stopped
            self.stop()
예제 #6
0
    def setup_spark_config(self):
        '''
        copy the default configuration files to spark_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('spark') / 'conf'
        spark_conf = self.dist_config.path('spark_conf')
        spark_conf.rmtree_p()
        default_conf.copytree(spark_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        default_conf.rmtree_p()
        spark_conf.symlink(default_conf)

        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        if not spark_env.exists():
            (self.dist_config.path('spark_conf') /
             'spark-env.sh.template').copy(spark_env)
        spark_default = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        if not spark_default.exists():
            (self.dist_config.path('spark_conf') /
             'spark-defaults.conf.template').copy(spark_default)
        spark_log4j = self.dist_config.path('spark_conf') / 'log4j.properties'
        if not spark_log4j.exists():
            (self.dist_config.path('spark_conf') /
             'log4j.properties.template').copy(spark_log4j)
        utils.re_edit_in_place(
            spark_log4j, {
                r'log4j.rootCategory=INFO, console':
                'log4j.rootCategory=ERROR, console',
            })
예제 #7
0
    def configure_remote_db(self, mysql):
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        jdbc_url = \
            "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format(
                mysql.host(), mysql.port(), mysql.database()
            )
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = jdbc_url
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = \
                "com.mysql.jdbc.Driver"

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HIVE_AUX_JARS_PATH *=.*':
            ('export HIVE_AUX_JARS_PATH='
             '/usr/share/java/mysql-connector-java.jar'),
        })

        # Now that we have db connection info, init our schema (only once)
        remote_db = hookenv.remote_service_name()
        if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db):
            tool_path = "{}/bin/schematool".format(
                self.dist_config.path('hive'))
            utils.run_as(
                'ubuntu', tool_path, '-initSchema', '-dbType', 'mysql')
            unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True)
            unitdata.kv().flush(True)
예제 #8
0
    def configure(self):
        '''
        Configure spark environment for all users
        '''
        spark_home = self.dist_config.path('spark')
        spark_bin = spark_home / 'bin'

        # put our jar in hdfs
        spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(spark_home))[0]
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib')
        try:
            utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar')
        except CalledProcessError:
            print ("File exists")

        # update environment variables
        with utils.environment_edit_in_place('/etc/environment') as env:
            if spark_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], spark_bin])
            env['MASTER'] = hookenv.config('spark_execution_mode')
            env['PYSPARK_DRIVER_PYTHON'] = "ipython"
            env['SPARK_CONF_DIR'] = self.dist_config.path('spark_conf')
            env['SPARK_HOME'] = spark_home
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # update spark config
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.eventLog.enabled *.*': 'spark.eventLog.enabled    true',
            r'.*spark.eventLog.dir *.*': 'spark.eventLog.dir    hdfs:///user/ubuntu/directory',
            })
예제 #9
0
    def setup_gobblin(self, host, port):
        '''
        Configure Gobblin. Each time something changes (eg) a new Haddop endpoint is present this method must be called.

        :param str ip: IP of the HDFS endpoint.
        :param str port: Port of the HDFS endpoint.
        '''

        # Setup the environment
        gobblin_bin = self.dist_config.path('gobblin') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if gobblin_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], gobblin_bin])
            env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin'
            env['GOBBLIN_WORK_DIR'] = "/user/gobblin/work"

        hdfs_endpoint = ''.join([host, ':', port])

        # Setup gobblin configuration
        conf_dir = self.dist_config.path('gobblin') / 'conf'
        gobblin_config_template = conf_dir / 'gobblin-mapreduce.properties.template'
        gobblin_config = conf_dir / 'gobblin-mapreduce.properties'
        try:
            copy(gobblin_config_template, gobblin_config)
        except FileNotFoundError:
            pass

        utils.re_edit_in_place(gobblin_config, {
            r'fs.uri=hdfs://localhost:8020': 'fs.uri=hdfs://%s' % hdfs_endpoint,
        })

        if '2.7.2' in self.hadoop_version:
            utils.re_edit_in_place(gobblin_config, {
                r'task.data.root.dir=*': 'task.data.root.dir=${env:GOBBLIN_WORK_DIR}/task'
            }, append_non_matches=True)
예제 #10
0
def zookeeper_config(zookeeper):
    hookenv.status_set('maintenance',
                       'Changing Apache NiFi to run as a cluster')
    hookenv.log(
        'Adding Apache Zookeeper -- Changing Apache NiFi to run as a cluster')
    conf = hookenv.config()
    zookeeper_servers_string = ''
    for zk_unit in zookeeper.zookeepers():
        zookeeper_servers_string += '{}:{},'.format(zk_unit['host'],
                                                    zk_unit['port'])
    re_edit_in_place(
        '%s/files/nifi-1.3.0/conf/nifi.properties' % hookenv.charm_dir(), {
            r'.*nifi.cluster.is.node.*':
            'nifi.cluster.is.node=true',
            r'.*nifi.cluster.node.address.*':
            'nifi.cluster.node.address={}'.format(hookenv.unit_private_ip()),
            r'.*nifi.web.http.port.*':
            'nifi.web.http.port={}'.format(conf['nifi-port']),
            r'.*nifi.cluster.node.protocol.port.*':
            'nifi.cluster.node.protocol.port={}'.format(conf['cluster-port']),
            r'.*nifi.zookeeper.connect.string.*':
            'nifi.zookeeper.connect.string={}'.format(zookeeper_servers_string)
        })
    hookenv.open_port(conf['cluster-port'])
    filesdir = '{}/files'.format(hookenv.charm_dir())
    try:
        subprocess.check_call(
            ['bash', '{}/nifi-1.3.0/bin/nifi.sh'.format(filesdir), 'restart'])
        hookenv.status_set('active', 'Running: cluster mode with Zookeeper')
        set_state('apache-nifi.cluster')
    except subprocess.CalledProcessError:
        hookenv.status_set('blocked', 'Failed to restart')
예제 #11
0
    def update_apps(self):
        # Add all services disabled unless we have a joined relation
        # as marked by the respective state
        # Enabled by default: 'filebrowser', 'jobbrowser'
        disabled_services = [
            'beeswax', 'impala', 'security', 'rdbms', 'jobsub', 'pig', 'hbase',
            'sqoop', 'zookeeper', 'metastore', 'spark', 'oozie', 'indexer',
            'search'
        ]

        for k, v in get_states().items():
            if "joined" in k:
                relname = k.split('.')[0]
                if 'hive' in relname:
                    disabled_services.remove('beeswax')
                    disabled_services.remove('metastore')
                if 'spark' in relname:
                    disabled_services.remove('spark')
                if 'oozie' in relname:
                    disabled_services.remove('oozie')
                if 'zookeeper' in relname:
                    disabled_services.remove('zookeeper')

        hue_config = ''.join(
            (self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
        services_string = ','.join(disabled_services)
        hookenv.log("Disabled apps {}".format(services_string))
        utils.re_edit_in_place(
            hue_config, {
                r'.*app_blacklist=.*': ''.join(
                    ('app_blacklist=', services_string))
            })

        self.check_relations()
예제 #12
0
    def setup_gobblin(self, host, port):
        '''
        Configure Gobblin. Each time something changes (eg) a new Haddop endpoint is present this method must be called.

        :param str ip: IP of the HDFS endpoint.
        :param str port: Port of the HDFS endpoint.
        '''

        # Setup the environment
        gobblin_bin = self.dist_config.path('gobblin') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if gobblin_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], gobblin_bin])
            env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin'
            env['GOBBLIN_WORK_DIR'] = "/user/gobblin/work"

        hdfs_endpoint = ''.join([host, ':', port])

        # Setup gobblin configuration
        conf_dir = self.dist_config.path('gobblin') / 'conf'
        gobblin_config_template = conf_dir / 'gobblin-mapreduce.properties.template'
        gobblin_config = conf_dir / 'gobblin-mapreduce.properties'
        copy(gobblin_config_template, gobblin_config)

        utils.re_edit_in_place(gobblin_config, {
            r'fs.uri=hdfs://localhost:8020':
            'fs.uri=hdfs://%s' % hdfs_endpoint,
        })
예제 #13
0
    def configure_kafka(self, zk_units):
        # Get ip:port data from our connected zookeepers
        if not zk_units:
            # if we have no zookeepers, make sure kafka is stopped
            self.stop()
        else:
            zks = []
            for remote_address, port in zk_units:
                ip = utils.resolve_private_address(remote_address)
                zks.append("%s:%s" % (ip, port))
            zks.sort()
            zk_connect = ",".join(zks)

            # update consumer props
            cfg = self.dist_config.path('kafka_conf') / 'consumer.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })

            # update server props
            cfg = self.dist_config.path('kafka_conf') / 'server.properties'
            utils.re_edit_in_place(cfg, {
                r'^zookeeper.connect=.*':
                'zookeeper.connect=%s' % zk_connect,
            })
def is_localdomain():
    """
    Determine if our domainname is 'localdomain'.

    This method is useful for determining if a machine's domainname is
    'localdomain' so we can configure applications accordingly.

    :return: True if domainname is 'localdomain'; False otherwise
    """
    # NB: lxd has a pesky bug where it makes all containers think they
    # are .localdomain when they are really .lxd:
    #   https://bugs.launchpad.net/juju/+bug/1633126
    # The .lxd domain is completely valid for lxc FQDNs, so if we are
    # in this scenario, update nsswitch.conf to prefer the accurate lxd dns
    # over /etc/hosts. All subsequent domainname tests by facter or any
    # other application will correctly report .lxd as the domainname.
    lxd_check = subprocess.check_output(['hostname', '-A']).strip().decode()
    if lxd_check.endswith('.lxd'):
        utils.re_edit_in_place('/etc/nsswitch.conf', {
            r'files dns': 'dns files'
        })

    domainname = subprocess.check_output(['facter', 'domain']).strip().decode()
    if domainname == 'localdomain':
        return True
    else:
        return False
예제 #15
0
    def configure_remote_db(self, mysql):
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        jdbc_url = \
            "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format(
                mysql.host(), mysql.port(), mysql.database()
            )
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = jdbc_url
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = \
                "com.mysql.jdbc.Driver"

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(
            hive_env, {
                r'.*export HIVE_AUX_JARS_PATH *=.*':
                ('export HIVE_AUX_JARS_PATH='
                 '/usr/share/java/mysql-connector-java.jar'),
            })

        # Now that we have db connection info, init our schema (only once)
        remote_db = hookenv.remote_service_name()
        if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db):
            tool_path = "{}/bin/schematool".format(
                self.dist_config.path('hive'))
            utils.run_as('ubuntu', tool_path, '-initSchema', '-dbType',
                         'mysql')
            unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True)
            unitdata.kv().flush(True)
예제 #16
0
    def setup_gobblin(self, host, port):
        """
        Configure Gobblin. Each time something changes (eg) a new Haddop endpoint is present this method must be called.

        :param str ip: IP of the HDFS endpoint.
        :param str port: Port of the HDFS endpoint.
        """

        # Setup the environment
        gobblin_bin = self.dist_config.path("gobblin") / "bin"
        with utils.environment_edit_in_place("/etc/environment") as env:
            if gobblin_bin not in env["PATH"]:
                env["PATH"] = ":".join([env["PATH"], gobblin_bin])
            env["HADOOP_BIN_DIR"] = env["HADOOP_HOME"] + "/bin"
            env["GOBBLIN_WORK_DIR"] = "/user/gobblin/work"

        hdfs_endpoint = "".join([host, ":", port])

        # Setup gobblin configuration
        conf_dir = self.dist_config.path("gobblin") / "conf"
        gobblin_config_template = conf_dir / "gobblin-mapreduce.properties.template"
        gobblin_config = conf_dir / "gobblin-mapreduce.properties"
        copy(gobblin_config_template, gobblin_config)

        utils.re_edit_in_place(gobblin_config, {r"fs.uri=hdfs://localhost:8020": "fs.uri=hdfs://%s" % hdfs_endpoint})
예제 #17
0
    def configure_hadoop(self):
        java_home = Path(unitdata.kv().get('java.home'))
        java_bin = java_home / 'bin'
        hadoop_bin = self.dist_config.path('hadoop') / 'bin'
        hadoop_sbin = self.dist_config.path('hadoop') / 'sbin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['JAVA_HOME'] = java_home
            if java_bin not in env['PATH']:
                env['PATH'] = ':'.join([java_bin, env['PATH']])  # ensure that correct java is used
            if hadoop_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hadoop_bin])
            if hadoop_sbin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hadoop_sbin])
            env['HADOOP_LIBEXEC_DIR'] = self.dist_config.path('hadoop') / 'libexec'
            env['HADOOP_INSTALL'] = self.dist_config.path('hadoop')
            env['HADOOP_HOME'] = self.dist_config.path('hadoop')
            env['HADOOP_COMMON_HOME'] = self.dist_config.path('hadoop')
            env['HADOOP_HDFS_HOME'] = self.dist_config.path('hadoop')
            env['HADOOP_MAPRED_HOME'] = self.dist_config.path('hadoop')
            env['HADOOP_YARN_HOME'] = self.dist_config.path('hadoop')
            env['YARN_HOME'] = self.dist_config.path('hadoop')
            env['HADOOP_CONF_DIR'] = self.dist_config.path('hadoop_conf')
            env['YARN_CONF_DIR'] = self.dist_config.path('hadoop_conf')
            env['YARN_LOG_DIR'] = self.dist_config.path('yarn_log_dir')
            env['HDFS_LOG_DIR'] = self.dist_config.path('hdfs_log_dir')
            env['HADOOP_LOG_DIR'] = self.dist_config.path('hdfs_log_dir')  # for hadoop 2.2.0 only
            env['MAPRED_LOG_DIR'] = '/var/log/hadoop/mapred'  # should be moved to config, but could
            env['MAPRED_PID_DIR'] = '/var/run/hadoop/mapred'  # be destructive for mapreduce operation

        hadoop_env = self.dist_config.path('hadoop_conf') / 'hadoop-env.sh'
        utils.re_edit_in_place(hadoop_env, {
            r'export JAVA_HOME *=.*': 'export JAVA_HOME=%s' % java_home,
        })
예제 #18
0
def initial_config():
    utils.re_edit_in_place('/etc/neo4j/neo4j.conf', {
        r'^#?dbms.connectors.default_listen_address=([0-9].[0-9].[0-9].[0-9]|)$':
            'dbms.connectors.default_listen_address=0.0.0.0',
        r'^#?dbms.security.auth_enabled=false$':
            'dbms.security.auth_enabled=false'
    })
def is_localdomain():
    """
    Determine if our domainname is 'localdomain'.

    This method is useful for determining if a machine's domainname is
    'localdomain' so we can configure applications accordingly.

    :return: True if domainname is 'localdomain'; False otherwise
    """
    # NB: lxd has a pesky bug where it makes all containers think they
    # are .localdomain when they are really .lxd:
    #   https://bugs.launchpad.net/juju/+bug/1633126
    # The .lxd domain is completely valid for lxc FQDNs, so if we are
    # in this scenario, update nsswitch.conf to prefer the accurate lxd dns
    # over /etc/hosts. All subsequent domainname tests by facter or any
    # other application will correctly report .lxd as the domainname.
    lxd_check = subprocess.check_output(['hostname', '-A']).strip().decode()
    if lxd_check.endswith('.lxd'):
        utils.re_edit_in_place('/etc/nsswitch.conf', {
            r'files dns': 'dns files'
        })

    domainname = subprocess.check_output(['facter', 'domain']).strip().decode()
    if domainname == 'localdomain':
        return True
    else:
        return False
 def disable_ha(self):
     spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
     utils.re_edit_in_place(spark_env, {
         r'.*SPARK_DAEMON_JAVA_OPTS.*': '# SPARK_DAEMON_JAVA_OPTS',
     })
     unitdata.kv().set('zookeepers.available', False)
     unitdata.kv().flush(True)
예제 #21
0
    def configure_hive(self, mysql):
        config = hookenv.config()
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props[
                'javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format(
                    mysql.host(), mysql.port(), mysql.database())
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props[
                'javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver"
            props[
                'hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[
                    self.cpu_arch]

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(
            hive_env, {
                r'.*export HADOOP_HEAPSIZE *=.*':
                'export HADOOP_HEAPSIZE=%s' % config['heap'],
                r'.*export HIVE_AUX_JARS_PATH *=.*':
                'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar',
            })

        # Now that we have db connection info, init our schema (only once)
        if not unitdata.kv().get('hive.schema.initialized'):
            utils.run_as('hive', 'schematool', '-initSchema', '-dbType',
                         'mysql')
            unitdata.kv().set('hive.schema.initialized', True)
예제 #22
0
    def setup_zookeeper_config(self):
        """Setup Zookeeper configuration based on default config.

        Copy the default configuration files to zookeeper_conf property
        defined in dist.yaml
        """
        default_conf = self.dist_config.path('zookeeper') / 'conf'
        zookeeper_conf = self.dist_config.path('zookeeper_conf')
        zookeeper_conf.rmtree_p()
        default_conf.copytree(zookeeper_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        default_conf.rmtree_p()
        zookeeper_conf.symlink(default_conf)

        zoo_cfg = zookeeper_conf / 'zoo.cfg'
        if not zoo_cfg.exists():
            (zookeeper_conf / 'zoo_sample.cfg').copy(zoo_cfg)
        utils.re_edit_in_place(zoo_cfg, {
            r'^dataDir.*': 'dataDir={}'.format(self.dist_config.path('zookeeper_data_dir')),
        })

        # Configure zookeeper environment for all users
        zookeeper_bin = self.dist_config.path('zookeeper') / 'bin'
        zookeeper_rest = self.dist_config.path('zookeeper') / 'src/contrib/rest'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if zookeeper_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], zookeeper_bin])
            env['ZOOCFGDIR'] = self.dist_config.path('zookeeper_conf')
            env['ZOO_BIN_DIR'] = zookeeper_bin
            env['ZOO_LOG_DIR'] = self.dist_config.path('zookeeper_log_dir')
            env['ZOO_REST_DIR'] = zookeeper_rest
예제 #23
0
 def configure_hive(self, hostname, port):
     hookenv.log("configuring hive connection")
     hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
     utils.re_edit_in_place(hue_config, {
         r'.*hive_server_host *=.*': 'hive_server_host=%s' % hostname,
         r'.*hive_server_port *=.*': 'hive_server_port=%s' % port
         })
    def setup_zookeeper_config(self):        
        """
        Setup Zookeeper configuration based on default config.

        Copy the default configuration files to zookeeper_conf property
        defined in dist.yaml
        """
        default_conf = self.dist_config.path('zookeeper') / 'conf'
        zookeeper_conf = self.dist_config.path('zookeeper_conf')
        zookeeper_conf.rmtree_p()
        default_conf.copytree(zookeeper_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        default_conf.rmtree_p()
        zookeeper_conf.symlink(default_conf)

        zoo_cfg = zookeeper_conf / 'zoo.cfg'
        if not zoo_cfg.exists():
            (zookeeper_conf / 'zoo_sample.cfg').copy(zoo_cfg)
        utils.re_edit_in_place(zoo_cfg, {
            r'^dataDir.*': 'dataDir={}'.format(self.dist_config.path('zookeeper_data_dir')),
        })

        # Configure zookeeper environment for all users
        zookeeper_bin = self.dist_config.path('zookeeper') / 'bin'
        zookeeper_rest = self.dist_config.path('zookeeper') / 'src/contrib/rest'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if zookeeper_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], zookeeper_bin])
            env['ZOOCFGDIR'] = self.dist_config.path('zookeeper_conf')
            env['ZOO_BIN_DIR'] = zookeeper_bin
            env['ZOO_LOG_DIR'] = self.dist_config.path('zookeeper_log_dir')
            env['ZOO_REST'] = zookeeper_rest
    def setup_flume_config(self):
        '''
        copy the default configuration files to flume_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('flume') / 'conf'
        flume_conf = self.dist_config.path('flume_conf')
        flume_conf.rmtree_p()
        default_conf.copytree(flume_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        default_conf.rmtree_p()
        flume_conf.symlink(default_conf)

        flume_env = flume_conf / 'flume-env.sh'
        if not flume_env.exists():
            (flume_conf / 'flume-env.sh.template').copy(flume_env)

        flume_conf_src = flume_conf / 'flume-conf.properties.template'
        flume_conf_dst = flume_conf / 'flume.conf'
        if not flume_conf_dst.exists():
            flume_conf_src.copy(flume_conf_dst)

        flume_log4j = self.dist_config.path('flume_conf') / 'log4j.properties'
        flume_logs = self.dist_config.path('flume_logs')
        utils.re_edit_in_place(flume_log4j, {
            r'^flume.log.dir.*': 'flume.log.dir={}'.format(flume_logs),
        })
예제 #26
0
    def setup_flume_config(self):
        '''
        copy the default configuration files to flume_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('flume') / 'conf'
        flume_conf = self.dist_config.path('flume_conf')
        flume_conf.rmtree_p()
        default_conf.copytree(flume_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        default_conf.rmtree_p()
        flume_conf.symlink(default_conf)

        flume_env = self.dist_config.path('flume_conf') / 'flume-env.sh'
        if not flume_env.exists():
            (self.dist_config.path('flume_conf') /
             'flume-env.sh.template').copy(flume_env)

        flume_conf = self.dist_config.path('flume_conf') / 'flume.conf'
        if not flume_conf.exists():
            (self.dist_config.path('flume_conf') /
             'flume-conf.properties.template').copy(flume_conf)

        flume_log4j = self.dist_config.path('flume_conf') / 'log4j.properties'
        utils.re_edit_in_place(
            flume_log4j, {
                r'^flume.log.dir.*':
                'flume.log.dir={}'.format(self.dist_config.path('flume_logs')),
            })
예제 #27
0
    def install(self):
        '''
        Perform initial one-time setup, workaround upstream bugs, and
        trigger puppet.
        '''
        # Dirs are handled by the bigtop deb, so no need to call out to
        # dist_config to do that work.  However, we want to adjust the
        # groups for the `ubuntu` user for better interaction with Juju.
        self.dist_config.add_users()

        # Set ports based on layer.yaml options
        self._add_override('zeppelin::server::server_port',
                           self.dist_config.port('zeppelin'))
        self._add_override('zeppelin::server::web_socket_port',
                           self.dist_config.port('zeppelin_websocket'))

        # Default spark to local mode on initial install. This will be
        # reconfigured if/when hadoop or spark relations are made.
        local_master = 'local[*]'
        self._add_override('zeppelin::server::spark_master_url', local_master)

        # The spark-client role expects hdfs by default. Since we want to
        # keep Hadoop optional, ensure we remove hadoopy bits from our
        # local spark config. This has no effect if/when a remote spark joins,
        # and since there is no spark history server running, the event dirs
        # are not important -- they just need not be 'hdfs:///blah'.
        events_log_dir = 'file:///tmp'
        self._add_override('spark::common::master_url', local_master)
        self._add_override('spark::common::event_log_dir', events_log_dir)
        self._add_override('spark::common::history_log_dir', events_log_dir)

        ##########
        # BUG: BIGTOP-2742
        # Default zeppelin init script looks for the literal '$(hostname)'
        # string. Symlink it so it exists before the apt install from puppet
        # tries to start the service.
        import subprocess
        host = subprocess.check_output(['hostname']).decode('utf8').strip()
        zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
        utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
        utils.run_as('root', 'ln', '-sf',
                     zepp_pid,
                     '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
        ##########

        self.trigger_bigtop()

        ##########
        # BUG: BIGTOP-2742
        # Puppet apply will call systemctl daemon-reload, which removes the
        # symlink we just created. Now that the bits are on disk, update the
        # init script $(hostname) that caused this mess to begin with.
        zepp_init_script = '/etc/init.d/zeppelin'
        utils.re_edit_in_place(zepp_init_script, {
            r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
        })
        utils.run_as('root', 'systemctl', 'daemon-reload')
        self.restart()
        self.wait_for_api(30)
예제 #28
0
    def install(self):
        '''
        Perform initial one-time setup, workaround upstream bugs, and
        trigger puppet.
        '''
        # Dirs are handled by the bigtop deb, so no need to call out to
        # dist_config to do that work.  However, we want to adjust the
        # groups for the `ubuntu` user for better interaction with Juju.
        self.dist_config.add_users()

        # Set ports based on layer.yaml options
        self._add_override('zeppelin::server::server_port',
                           self.dist_config.port('zeppelin'))
        self._add_override('zeppelin::server::web_socket_port',
                           self.dist_config.port('zeppelin_websocket'))

        # Default spark to local mode on initial install. This will be
        # reconfigured if/when hadoop or spark relations are made.
        local_master = 'local[*]'
        self._add_override('zeppelin::server::spark_master_url', local_master)

        # The spark-client role expects hdfs by default. Since we want to
        # keep Hadoop optional, ensure we remove hadoopy bits from our
        # local spark config. This has no effect if/when a remote spark joins,
        # and since there is no spark history server running, the event dirs
        # are not important -- they just need not be 'hdfs:///blah'.
        events_log_dir = 'file:///tmp'
        self._add_override('spark::common::master_url', local_master)
        self._add_override('spark::common::event_log_dir', events_log_dir)
        self._add_override('spark::common::history_log_dir', events_log_dir)

        ##########
        # BUG: BIGTOP-2742
        # Default zeppelin init script looks for the literal '$(hostname)'
        # string. Symlink it so it exists before the apt install from puppet
        # tries to start the service.
        import subprocess
        host = subprocess.check_output(['hostname']).decode('utf8').strip()
        zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
        utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
        utils.run_as('root', 'ln', '-sf', zepp_pid,
                     '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
        ##########

        self.trigger_bigtop()

        ##########
        # BUG: BIGTOP-2742
        # Puppet apply will call systemctl daemon-reload, which removes the
        # symlink we just created. Now that the bits are on disk, update the
        # init script $(hostname) that caused this mess to begin with.
        zepp_init_script = '/etc/init.d/zeppelin'
        utils.re_edit_in_place(
            zepp_init_script, {
                r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
            })
        utils.run_as('root', 'systemctl', 'daemon-reload')
        self.restart()
        self.wait_for_api(30)
예제 #29
0
 def disable_ha(self):
     spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
     utils.re_edit_in_place(
         spark_env, {
             r'.*SPARK_DAEMON_JAVA_OPTS.*': '# SPARK_DAEMON_JAVA_OPTS',
         })
     unitdata.kv().set('zookeepers.available', False)
     unitdata.kv().flush(True)
예제 #30
0
def install_java():
    java_package = "openjdk-8-jdk-headless"
    fetch.apt_install(java_package)
    java_home_ = java_home()
    utils.re_edit_in_place(
        '/etc/environment', {
            r'#? *JAVA_HOME *=.*': 'JAVA_HOME={}'.format(java_home_),
        },
        append_non_matches=True)
예제 #31
0
    def setup_hue(self, namenodes, resourcemanagers, hdfs_port, yarn_port, yarn_http, yarn_ipc):
        hookenv.status_set('maintenance', 'Setting up Hue')
        hue_bin = self.dist_config.path('hue') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hue_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hue_bin])
            env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin'
            env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir')
            hadoop_conf = env['HADOOP_CONF_DIR'] + '/core-site.xml'
            yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml'
            mapred_conf = env['HADOOP_CONF_DIR'] + '/mapred-site.xml'

        with utils.xmlpropmap_edit_in_place(hadoop_conf) as props:
            hdfs_endpoint = props['fs.defaultFS']

        with utils.xmlpropmap_edit_in_place(yarn_conf) as props:
            yarn_log_url = props['yarn.log.server.url'] # 19888
            yarn_resmgr = props['yarn.resourcemanager.address'] # 8032

        with utils.xmlpropmap_edit_in_place(mapred_conf) as props:
            mapred_jobhistory = props['mapreduce.jobhistory.address'] # 10020

        default_conf = self.dist_config.path('hue') / 'desktop/conf'
        hue_conf = self.dist_config.path('hue_conf')

        if os.path.islink('/usr/lib/hue/desktop/conf'):
                return
        else:
                hue_conf.rmtree_p()
                default_conf.copytree(hue_conf)
                # Now remove the conf included in the tarball and symlink our real conf
                default_conf.rmtree_p()
                hue_conf.symlink(default_conf)
        
        hdfs_fulluri = hdfs_endpoint.split('/')[2]
        hdfs_hostname = hdfs_fulluri.split(':')[0]

        hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
        hue_port = self.dist_config.port('hue_web')

        # Fix following for HA: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_hadoop-ha/content/ha-nn-deploy-hue.html
        hookenv.log("Not currently supporting HA, FIX: namenodes are: " + str(namenodes) + " resmanagers: " + str(resourcemanagers))
        utils.re_edit_in_place(hue_config, {
            r'http_port=8888': 'http_port=%s' % hue_port,
            #r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s' % hdfs_endpoint,
            r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s:%s' % (namenodes[0], hdfs_port),
            #r'## resourcemanager_host=localhost': 'resourcemanager_host=%s' % yarn_resmgr.split(':')[0],
            r'.*resourcemanager_host=localhost': 'resourcemanager_host=%s' % resourcemanagers[0],
            #r'## resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_resmgr.split(':')[1],
            r'.*resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_port,
            r'.*webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://%s:50070/webhdfs/v1' % namenodes[0],
            r'.*history_server_api_url=http://localhost:19888': 'history_server_api_url=%s' % yarn_log_url.split('/')[0],
            r'.*resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://%s:8088' % yarn_resmgr.split(':')[0],
            r'.*secret_key=.*': 'secret_key=%s' % uuid.uuid4()
            })

        self.update_apps()
예제 #32
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        if unitdata.kv().get('zeppelin.version.repo', False):
            hookenv.log("An upgrade is available and the site.yaml has been "
                        "configured. Run the 'reinstall' action to continue.",
                        level=hookenv.INFO)
        else:
            ####################################################################
            # BUG: BIGTOP-2742
            # Default zeppelin init script looks for the literal '$(hostname)'
            # string. Symlink it so it exists before the apt install from puppet
            # tries to start the service.
            import subprocess
            host = subprocess.check_output(['hostname']).decode('utf8').strip()
            zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
            utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
            utils.run_as('root', 'ln', '-sf',
                         zepp_pid,
                         '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
            ####################################################################

            bigtop.trigger_puppet()
            self.wait_for_api(30)

            ####################################################################
            # BUG: BIGTOP-2742
            # Puppet apply will call systemctl daemon-reload, which removes the
            # symlink we just created. Now that the bits are on disk, update the
            # init script $(hostname) that caused this mess to begin with.
            zepp_init_script = '/etc/init.d/zeppelin'
            utils.re_edit_in_place(zepp_init_script, {
                r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
            })
            utils.run_as('root', 'systemctl', 'daemon-reload')
            self.restart()
            self.wait_for_api(30)
예제 #33
0
 def configure_spark(self, hostname, port):
     #hookenv.log("configuring spark connection via livy")
     hue_config = ''.join(
         (self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
     utils.re_edit_in_place(
         hue_config, {
             r'.*livy_server_host *=.*': 'livy_server_host=%s' % hostname,
             r'.*livy_server_port *=.*': 'livy_server_port=%s' % port
         })
예제 #34
0
 def configure_hive(self, hostname, port):
     hookenv.log("configuring hive connection")
     hue_config = ''.join(
         (self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
     utils.re_edit_in_place(
         hue_config, {
             r'.*hive_server_host *=.*': 'hive_server_host=%s' % hostname,
             r'.*hive_server_port *=.*': 'hive_server_port=%s' % port
         })
예제 #35
0
 def configure_hive(self):
     '''
     Called during config-changed events
     '''
     config = hookenv.config()
     hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
     utils.re_edit_in_place(hive_env, {
         r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'],
     })
예제 #36
0
    def install(self):
        '''
        Perform initial one-time setup, workaround upstream bugs, and
        trigger puppet.
        '''
        # Dirs are handled by the bigtop deb, so no need to call out to
        # dist_config to do that work.  However, we want to adjust the
        # groups for the `ubuntu` user for better interaction with Juju.
        self.dist_config.add_users()

        # Set ports based on layer.yaml options
        self._add_override('zeppelin::server::server_port',
                           self.dist_config.port('zeppelin'))
        self._add_override('zeppelin::server::web_socket_port',
                           self.dist_config.port('zeppelin_web'))

        # Default spark to local mode on initial install. This will be
        # reconfigured if/when hadoop or spark relations are made.
        self._add_override('zeppelin::server::spark_master_url', 'local[*]')

        ##########
        # BUG: BIGTOP-2742
        # Default zeppelin init script looks for the literal '$(hostname)'
        # string. Symlink it so it exists before the apt install from puppet
        # tries to start the service.
        import subprocess
        host = subprocess.check_output(['hostname']).decode('utf8').strip()
        zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
        utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
        utils.run_as('root', 'ln', '-sf',
                     zepp_pid,
                     '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
        ##########

        self.trigger_bigtop()

        ##########
        # BUG: BIGTOP-2742
        # Puppet apply will call systemctl daemon-reload, which removes the
        # symlink we just created. Now that the bits are on disk, update the
        # init script $(hostname) that caused this mess to begin with.
        zepp_init_script = '/etc/init.d/zeppelin'
        utils.re_edit_in_place(zepp_init_script, {
            r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
        })
        utils.run_as('root', 'systemctl', 'daemon-reload')
        self.restart()
        self.wait_for_api(30)
        ##########

        ##########
        # BUG: BIGTOP-2154
        # The zep deb depends on spark-core and spark-python. However, because
        # of the unholy requirement to have hive tightly coupled to spark,
        # we need to ensure spark-datanucleus is installed. Do this after the
        # initial install so the bigtop repo is available to us.
        utils.run_as('root', 'apt-get', 'install', '-qy', 'spark-datanucleus')
예제 #37
0
    def configure_yarn_mode(self):
        # put the spark jar in hdfs
        spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(
            self.dist_config.path('spark')))[0]
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/share/lib')
        try:
            utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar,
                         '/user/ubuntu/share/lib/spark-assembly.jar')
        except CalledProcessError:
            pass  # jar already in HDFS from another Spark

        with utils.environment_edit_in_place('/etc/environment') as env:
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # create hdfs storage space for history server
        dc = self.dist_config
        prefix = dc.path('log_prefix')
        events_dir = dc.path('spark_events')
        events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, ''))
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir)
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     events_dir)

        # create hdfs storage space for spark-bench
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/spark-bench')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     '/user/ubuntu/spark-bench')

        # ensure user-provided Hadoop works
        hadoop_classpath = utils.run_as('hdfs',
                                        'hadoop',
                                        'classpath',
                                        capture_output=True)
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DIST_CLASSPATH.*':
            'SPARK_DIST_CLASSPATH={}'.format(hadoop_classpath),
        },
                               append_non_matches=True)

        # update spark-defaults
        spark_conf = self.dist_config.path(
            'spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*':
            'spark.master {}'.format(self.get_master()),
        },
                               append_non_matches=True)

        unitdata.kv().set('hdfs.available', True)
        unitdata.kv().flush(True)
예제 #38
0
 def configure_hive(self):
     '''
     Called during config-changed events
     '''
     config = hookenv.config()
     hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
     utils.re_edit_in_place(
         hive_env, {
             r'.*export HADOOP_HEAPSIZE *=.*':
             'export HADOOP_HEAPSIZE=%s' % config['heap'],
         })
예제 #39
0
    def set_advertise(self):
        short_host = check_output(['hostname', '-s']).decode('utf8').strip()

        # Configure server.properties
        # NB: We set the advertised.host.name below to our short hostname
        # to kafka (admin will still have to expose kafka and ensure the
        # external client can resolve the short hostname to our public ip).
        kafka_server_conf = '/etc/kafka/conf/server.properties'
        utils.re_edit_in_place(kafka_server_conf, {
            r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % short_host,
        })
예제 #40
0
def zookeeper_removed():
    hookenv.status_set('maintenance', 'Removing Apache NiFi from cluster')
    re_edit_in_place('{}/files/nifi-1.1.1/conf/nifi.properties'.format(hookenv.charm_dir()), {
        r'.*nifi.cluster.is.node.*': 'nifi.cluster.is.node=false'
    })
    hookenv.close_port(hookenv.config()['cluster-port'])
    if service_restart('nifi'):
        remove_state('apache-nifi.cluster')
        hookenv.status_set('active', 'Running: standalone mode')
    else:
        hookenv.status_set('error', 'Failed to restart')
예제 #41
0
def config_bindings():
    try:
        subprocess.check_call(['service','neo4j','stop'])
    except subprocess.CalledProcessError as exception:
        hooken.log(exception.output)
    utils.re_edit_in_place('/etc/neo4j/neo4j.conf', {
        r'#dbms.connector.http.address=0.0.0.0:7474': 'dbms.connector.http.address=0.0.0.0:7474',
    })
    service_start('neo4j')
    hookenv.status_set('active','Ready')
    set_state('neo4j.installed')
def init_fw():
    # this value has te be changed to set ufw rules
    utils.re_edit_in_place('/etc/default/ufw', {
        r'IPV6=yes': 'IPV6=no',
    })
    if config('firewall_enabled'):
        sp.check_call(['ufw', 'allow', '22'])
        sp.check_output(['ufw', 'enable'], input='y\n',
                        universal_newlines=True)
    else:
        sp.check_output(['ufw', 'disable'])
def init_fw():
    conf = config()
    utils.re_edit_in_place('/etc/default/ufw', {
        r'IPV6=yes': 'IPV6=no',
    })
    if conf['firewall-enabled']:
        subprocess.check_call(['ufw', 'allow', '22'])
        subprocess.check_output(['ufw', 'enable'],
                                input='y\n',
                                universal_newlines=True)
    else:
        subprocess.check_output(['ufw', 'disable'])
예제 #44
0
 def setup_puppet_config(self, NN, RM):
     # generate site.yaml. Something like this would do
     hiera_dst = self.options.get('bigtop_hiera_path')
     hiera_conf = self.options.get('bigtop_hiera_config')
     hiera_site_yaml = self.options.get('bigtop_hiera_siteyaml')
     bigtop_site_yaml = "{0}/{1}/{2}".format(self.bigtop_dir, self.bigtop_version, hiera_site_yaml)
     self.prepare_bigtop_config(bigtop_site_yaml, NN, RM)
     # Now copy hiera.yaml to /etc/puppet & point hiera to use the above location as hieradata directory
     Path("{0}/{1}/{2}".format(self.bigtop_dir, self.bigtop_version, hiera_conf)).copy(hiera_dst)
     utils.re_edit_in_place(hiera_dst, {
         r'.*:datadir.*': "  :datadir: {0}/".format(os.path.dirname(bigtop_site_yaml)),
     })
예제 #45
0
    def setup_kafka_config(self):
        '''
        copy the default configuration files to kafka_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('kafka') / 'config'
        kafka_conf = self.dist_config.path('kafka_conf')
        kafka_conf.rmtree_p()
        default_conf.copytree(kafka_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        # dir. we've seen issues where kafka still looks for config in
        # KAFKA_HOME/config.
        default_conf.rmtree_p()
        kafka_conf.symlink(default_conf)

        # Configure immutable bits
        kafka_bin = self.dist_config.path('kafka') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if kafka_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], kafka_bin])
            env['LOG_DIR'] = self.dist_config.path('kafka_app_logs')

        # note: we set the advertised.host.name below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka
        public_address = hookenv.unit_get('public-address')
        private_ip = utils.resolve_private_address(
            hookenv.unit_get('private-address'))
        kafka_server_conf = self.dist_config.path(
            'kafka_conf') / 'server.properties'
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        utils.re_edit_in_place(
            kafka_server_conf, {
                r'^broker.id=.*':
                'broker.id=%s' % unit_num,
                r'^port=.*':
                'port=%s' % self.dist_config.port('kafka'),
                r'^log.dirs=.*':
                'log.dirs=%s' % self.dist_config.path('kafka_data_logs'),
                r'^#?advertised.host.name=.*':
                'advertised.host.name=%s' % public_address,
            })

        kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties'
        utils.re_edit_in_place(
            kafka_log4j, {
                r'^kafka.logs.dir=.*':
                'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'),
            })

        # fix for lxc containers and some corner cases in manual provider
        # ensure that public_address is resolvable internally by mapping it to the private IP
        utils.update_kv_host(private_ip, public_address)
        utils.manage_etc_hosts()
예제 #46
0
    def setup_kafka_config(self):
        '''
        copy the default configuration files to kafka_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('kafka') / 'config'
        kafka_conf = self.dist_config.path('kafka_conf')
        kafka_conf.rmtree_p()
        default_conf.copytree(kafka_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        # dir. we've seen issues where kafka still looks for config in
        # KAFKA_HOME/config.
        default_conf.rmtree_p()
        kafka_conf.symlink(default_conf)

        # Configure immutable bits
        kafka_bin = self.dist_config.path('kafka') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if kafka_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], kafka_bin])
            env['LOG_DIR'] = self.dist_config.path('kafka_app_logs')

        # note: we set the advertised.host.name below to the public_address
        # to ensure that external (non-Juju) clients can connect to Kafka
        public_address = hookenv.unit_get('public-address')
        private_ip = utils.resolve_private_address(hookenv.unit_get('private-address'))
        kafka_server_conf = self.dist_config.path('kafka_conf') / 'server.properties'
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        utils.re_edit_in_place(kafka_server_conf, {
            r'^broker.id=.*': 'broker.id=%s' % unit_num,
            r'^port=.*': 'port=%s' % self.dist_config.port('kafka'),
            r'^log.dirs=.*': 'log.dirs=%s' % self.dist_config.path('kafka_data_logs'),
            r'^#?advertised.host.name=.*': 'advertised.host.name=%s' % public_address,
        })

        kafka_log4j = self.dist_config.path('kafka_conf') / 'log4j.properties'
        utils.re_edit_in_place(kafka_log4j, {
            r'^kafka.logs.dir=.*': 'kafka.logs.dir=%s' % self.dist_config.path('kafka_app_logs'),
        })

        # fix for lxc containers and some corner cases in manual provider
        # ensure that public_address is resolvable internally by mapping it to the private IP
        utils.update_etc_hosts({private_ip: public_address})

        templating.render(
            'upstart.conf',
            '/etc/init/kafka.conf',
            context={
                'kafka_conf': self.dist_config.path('kafka_conf'),
                'kafka_bin': '{}/bin'.format(self.dist_config.path('kafka'))
            },
        )
예제 #47
0
def config_bindings():
    try:
        subprocess.check_call(['service', 'neo4j', 'stop'])
    except subprocess.CalledProcessError as exception:
        hooken.log(exception.output)
    utils.re_edit_in_place(
        '/etc/neo4j/neo4j.conf', {
            r'#dbms.connector.http.address=0.0.0.0:7474':
            'dbms.connector.http.address=0.0.0.0:7474',
        })
    service_start('neo4j')
    hookenv.status_set('active', 'Ready')
    set_state('neo4j.installed')
예제 #48
0
    def set_advertise(self):
        short_host = check_output(['hostname', '-s']).decode('utf8').strip()

        # Configure server.properties
        # NB: We set the advertised.host.name below to our short hostname
        # to kafka (admin will still have to expose kafka and ensure the
        # external client can resolve the short hostname to our public ip).
        kafka_server_conf = '/etc/kafka/conf/server.properties'
        utils.re_edit_in_place(
            kafka_server_conf, {
                r'^#?advertised.host.name=.*':
                'advertised.host.name=%s' % short_host,
            })
    def configure_notebook(self):
        # profile config created during install
        ipython_profile = "ipython_notebook_config.py"
        # find path to ipython_notebook_config.py
        pPath = "/home/ubuntu/.ipython/profile_pyspark"
        cmd = ['find', pPath, '-name', ipython_profile]
        profile_config = check_output(cmd, universal_newlines=True).strip()

        # update profile with standard opts and configured port
        port = self.dist_config.port('notebook')
        notebooks_dir = self.dist_config.path('notebooks')
        utils.re_edit_in_place(profile_config, {
            r'.*c.NotebookApp.ip *=.*':
            'c.NotebookApp.ip = "*"',

            r'.*c.NotebookApp.open_browser *=.*':
            'c.NotebookApp.open_browser = False',

            r'.*c.NotebookApp.port *=.*':
            'c.NotebookApp.port = {}'.format(port),

            r'.*c.NotebookManager.notebook_dir *=.*':
            "c.NotebookManager.notebook_dir = u'{}'".format(notebooks_dir),
        })

        spark_home = os.environ.get("SPARK_HOME", '/usr/lib/spark')
        py4j = "py4j-0.*.zip"
        cmd = "find {} -name {}".format(spark_home, py4j)
        # TODO: handle missing py4j
        py4j_path = check_output(cmd.split(), universal_newlines=True).strip()

        setup_source = 'scripts/00-pyspark-setup.py'
        Path(setup_source).chmod(0o755)
        Path(setup_source).chown('ubuntu', 'hadoop')
        utils.re_edit_in_place(setup_source, {
            r'py4j *=.*': 'py4j="{}"'.format(py4j_path),
        })
        home = Path(os.environ.get('HOME', '/home/ubuntu'))
        profile_dir = home / '.ipython/profile_pyspark'
        setup_target = profile_dir / 'startup/00-pyspark-setup.py'
        Path(setup_source).copy2(setup_target)

        # Our spark charm defaults to yarn-client, so that should
        # be a safe default here in case MASTER isn't set. Update the env
        # with our spark mode and py4j location.
        spark_mode = os.environ.get("MASTER", "yarn-client")
        spark_home = Path(os.environ.get("SPARK_HOME", "/usr/lib/spark"))
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['PYSPARK_DRIVER_PYTHON_OPTS'] = "notebook"
            env['PYSPARK_SUBMIT_ARGS'] = "--master " + spark_mode
            env['PYTHONPATH'] = spark_home / py4j_path
예제 #50
0
 def configure(self, mode):
     livy_conf = self.dist_config.path('livy') / 'conf/livy-defaults.conf'
     if not livy_conf.exists():
         (self.dist_config.path('livy') / 'conf/livy-defaults.conf.template').copy(livy_conf)
     etc_conf = self.dist_config.path('livy_conf') / 'livy-defaults.conf'
     if not etc_conf.exists():
         livy_conf.symlink(etc_conf)
     if mode == 'yarn-client':
         spark_mode = 'yarn'
     else:
         spark_mode = 'process'
     utils.re_edit_in_place(livy_conf, {
         r'.*livy.server.session.factory =*.*': '  livy.server.session.factory = ' + spark_mode,
         })
예제 #51
0
 def test_re_edit_in_place(self):
     fd, filename = tempfile.mkstemp()
     os.close(fd)
     tmp_file = Path(filename)
     try:
         tmp_file.write_text('foo\nbar\nqux')
         utils.re_edit_in_place(tmp_file, {
             r'oo$': 'OO',
             r'a': 'A',
             r'^qux$': 'QUX',
         })
         self.assertEqual(tmp_file.text(), 'fOO\nbAr\nQUX')
     finally:
         tmp_file.remove()
    def configure_hadoop_libs(self):
        if unitdata.kv().get('hadoop.extra.installed', False):
            return

        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.driver.extraClassPath .*': 'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath),
            r'.*spark.jars .*': 'spark.jars {}'.format(hadoop_extra_classpath),
        }, append_non_matches=True)

        unitdata.kv().set('hadoop.extra.installed', True)
        unitdata.kv().flush(True)
예제 #53
0
 def test_re_edit_in_place(self):
     fd, filename = tempfile.mkstemp()
     os.close(fd)
     tmp_file = Path(filename)
     try:
         tmp_file.write_text('foo\nbar\nqux')
         utils.re_edit_in_place(tmp_file, {
             r'oo$': 'OO',
             r'a': 'A',
             r'^qux$': 'QUX',
         })
         self.assertEqual(tmp_file.text(), 'fOO\nbAr\nQUX')
     finally:
         tmp_file.remove()
    def disable_yarn_mode(self):
        # put the spark jar in hdfs
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['SPARK_JAR'] = glob('{}/lib/spark-assembly-*.jar'.format(
                                    self.dist_config.path('spark')))[0]

        # update spark-defaults
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*': 'spark.master {}'.format(self.get_master()),
        }, append_non_matches=True)

        unitdata.kv().set('hdfs.available', False)
        unitdata.kv().flush(True)
예제 #55
0
    def configure_local_db(self):
        local_url = 'jdbc:derby:;databaseName=/var/lib/hive/metastore/metastore_db;create=true'
        local_driver = 'org.apache.derby.jdbc.EmbeddedDriver'
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = local_url
            props['javax.jdo.option.ConnectionUserName'] = '******'
            props['javax.jdo.option.ConnectionPassword'] = '******'
            props['javax.jdo.option.ConnectionDriverName'] = local_driver

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HIVE_AUX_JARS_PATH *=.*': '# export HIVE_AUX_JARS_PATH=',
        })
예제 #56
0
def install():
    hookenv.log('Installing neo4j')
    config = hookenv.config()
    hookenv.open_port(config['port'])
    fetch.configure_sources(True)
    fetch.apt_install(fetch.filter_installed_packages(['neo4j']))
    utils.re_edit_in_place('/etc/neo4j/neo4j-server.properties', {
        r'#org.neo4j.server.webserver.address=0.0.0.0': 'org.neo4j.server.webserver.address=0.0.0.0',
    })
#    utils.re_edit_in_place('/etc/security/limits.conf', {
#        r'#org.neo4j.server.webserver.address=127.0.0.1': 'org.neo4j.server.webserver.address=0.0.0.0',
#    })
    service_restart('neo4j-service')
    set_state('neo4j.installed')
예제 #57
0
def zookeeper_removed():
    filesdir = '{}/files'.format(hookenv.charm_dir())
    hookenv.status_set('maintenance', 'Removing Apache NiFi from cluster')
    re_edit_in_place(
        '{}/files/nifi-1.3.0/conf/nifi.properties'.format(hookenv.charm_dir()),
        {r'.*nifi.cluster.is.node.*': 'nifi.cluster.is.node=false'})
    hookenv.close_port(hookenv.config()['cluster-port'])
    try:
        subprocess.check_call(
            ['bash', '{}/nifi-1.3.0/bin/nifi.sh'.format(filesdir), 'restart'])
        hookenv.status_set('active', 'Running: standalone mode')
        set_state('apache-nifi.installed')
    except subprocess.CalledProcessError:
        hookenv.status_set('blocked', 'Failed to restart')
예제 #58
0
    def configure_yarn_mode(self):
        # put the spark jar in hdfs
        spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format(
                                  self.dist_config.path('spark')))[0]
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/share/lib')
        try:
            utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar,
                         '/user/ubuntu/share/lib/spark-assembly.jar')
        except CalledProcessError:
            pass  # jar already in HDFS from another Spark

        with utils.environment_edit_in_place('/etc/environment') as env:
            env['SPARK_JAR'] = "hdfs:///user/ubuntu/share/lib/spark-assembly.jar"

        # create hdfs storage space for history server
        dc = self.dist_config
        prefix = dc.path('log_prefix')
        events_dir = dc.path('spark_events')
        events_dir = 'hdfs:///{}'.format(events_dir.replace(prefix, ''))
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', events_dir)
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     events_dir)

        # create hdfs storage space for spark-bench
        utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/ubuntu/spark-bench')
        utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop',
                     '/user/ubuntu/spark-bench')

        # ensure user-provided Hadoop works
        hadoop_classpath = utils.run_as('hdfs', 'hadoop', 'classpath',
                                        capture_output=True)
        spark_env = self.dist_config.path('spark_conf') / 'spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DIST_CLASSPATH.*': 'SPARK_DIST_CLASSPATH={}'.format(hadoop_classpath),
        }, append_non_matches=True)

        # update spark-defaults
        spark_conf = self.dist_config.path('spark_conf') / 'spark-defaults.conf'
        etc_env = utils.read_etc_env()
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        utils.re_edit_in_place(spark_conf, {
            r'.*spark.master .*': 'spark.master {}'.format(self.get_master()),
            r'.*spark.driver.extraClassPath .*': 'spark.driver.extraClassPath {}'.format(hadoop_extra_classpath),
        }, append_non_matches=True)

        unitdata.kv().set('hdfs.available', True)
        unitdata.kv().flush(True)
예제 #59
0
    def update_bind_address(self):
        """
        Possibly update network interface bindings

        """
        network_interface = config().get('network_interface')

        if network_interface:
            network_interface = get_ip_for_interface(network_interface)
            zookeeper_cfg = "{}/zoo.cfg".format(
                self.dist_config.path('zookeeper_conf'))

            utils.re_edit_in_place(zookeeper_cfg, {
                r'^clientPortAddress.*': 'clientPortAddress={}'.format(
                    network_interface)}, append_non_matches=True)