Exemple #1
0
    def configure_hdfs_base(self, host, port):
        dc = self.hadoop_base.dist_config
        core_site = dc.path('hadoop_conf') / 'core-site.xml'
        with utils.xmlpropmap_edit_in_place(core_site) as props:
            if host and port:
                props['fs.defaultFS'] = "hdfs://{host}:{port}".format(host=host, port=port)
            props['hadoop.proxyuser.hue.hosts'] = "*"
            props['hadoop.proxyuser.hue.groups'] = "*"
            props['hadoop.proxyuser.oozie.groups'] = '*'
            props['hadoop.proxyuser.oozie.hosts'] = '*'
            if 'lzo' in self.hadoop_base.resources:
                props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, '
                                                  'org.apache.hadoop.io.compress.DefaultCodec, '
                                                  'org.apache.hadoop.io.compress.BZip2Codec, '
                                                  'org.apache.hadoop.io.compress.SnappyCodec, '
                                                  'com.hadoop.compression.lzo.LzoCodec, '
                                                  'com.hadoop.compression.lzo.LzopCodec')
                props['io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec'
            else:
                props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, '
                                                  'org.apache.hadoop.io.compress.DefaultCodec, '
                                                  'org.apache.hadoop.io.compress.BZip2Codec, '
                                                  'org.apache.hadoop.io.compress.SnappyCodec')

        hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
        with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
            props['dfs.webhdfs.enabled'] = "true"
            props['dfs.namenode.name.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name'
            props['dfs.datanode.data.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name'
            props['dfs.permissions'] = 'false'  # TODO - secure this hadoop installation!
 def configure_yarn_base(self, host, port, history_http, history_ipc):
     dc = self.hadoop_base.dist_config
     yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml'
     with utils.xmlpropmap_edit_in_place(yarn_site) as props:
         props['yarn.nodemanager.aux-services'] = 'mapreduce_shuffle'
         props['yarn.nodemanager.vmem-check-enabled'] = 'false'
         if host:
             props['yarn.resourcemanager.hostname'] = '{}'.format(host)
             props['yarn.resourcemanager.address'] = '{}:{}'.format(
                 host, port)
             props["yarn.log.server.url"] = "{}:{}/jobhistory/logs/".format(
                 host, history_http)
     mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml'
     with utils.xmlpropmap_edit_in_place(mapred_site) as props:
         if host and history_ipc:
             props["mapreduce.jobhistory.address"] = "{}:{}".format(
                 host, history_ipc)
         if host and history_http:
             props["mapreduce.jobhistory.webapp.address"] = "{}:{}".format(
                 host, history_http)
         props["mapreduce.framework.name"] = 'yarn'
         props[
             "mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp"
         props["mapreduce.jobhistory.done-dir"] = "/mr-history/done"
         props["mapreduce.map.output.compress"] = 'true'
         props[
             "mapred.map.output.compress.codec"] = 'org.apache.hadoop.io.compress.SnappyCodec'
         props[
             "mapreduce.application.classpath"] = "$HADOOP_HOME/share/hadoop/mapreduce/*,\
Exemple #3
0
    def setup_hue(self, namenodes, resourcemanagers, hdfs_port, yarn_port, yarn_http, yarn_ipc):
        hookenv.status_set('maintenance', 'Setting up Hue')
        hue_bin = self.dist_config.path('hue') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hue_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hue_bin])
            env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin'
            env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir')
            hadoop_conf = env['HADOOP_CONF_DIR'] + '/core-site.xml'
            yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml'
            mapred_conf = env['HADOOP_CONF_DIR'] + '/mapred-site.xml'

        with utils.xmlpropmap_edit_in_place(hadoop_conf) as props:
            hdfs_endpoint = props['fs.defaultFS']

        with utils.xmlpropmap_edit_in_place(yarn_conf) as props:
            yarn_log_url = props['yarn.log.server.url'] # 19888
            yarn_resmgr = props['yarn.resourcemanager.address'] # 8032

        with utils.xmlpropmap_edit_in_place(mapred_conf) as props:
            mapred_jobhistory = props['mapreduce.jobhistory.address'] # 10020

        default_conf = self.dist_config.path('hue') / 'desktop/conf'
        hue_conf = self.dist_config.path('hue_conf')

        if os.path.islink('/usr/lib/hue/desktop/conf'):
                return
        else:
                hue_conf.rmtree_p()
                default_conf.copytree(hue_conf)
                # Now remove the conf included in the tarball and symlink our real conf
                default_conf.rmtree_p()
                hue_conf.symlink(default_conf)
        
        hdfs_fulluri = hdfs_endpoint.split('/')[2]
        hdfs_hostname = hdfs_fulluri.split(':')[0]

        hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
        hue_port = self.dist_config.port('hue_web')

        # Fix following for HA: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_hadoop-ha/content/ha-nn-deploy-hue.html
        hookenv.log("Not currently supporting HA, FIX: namenodes are: " + str(namenodes) + " resmanagers: " + str(resourcemanagers))
        utils.re_edit_in_place(hue_config, {
            r'http_port=8888': 'http_port=%s' % hue_port,
            #r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s' % hdfs_endpoint,
            r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s:%s' % (namenodes[0], hdfs_port),
            #r'## resourcemanager_host=localhost': 'resourcemanager_host=%s' % yarn_resmgr.split(':')[0],
            r'.*resourcemanager_host=localhost': 'resourcemanager_host=%s' % resourcemanagers[0],
            #r'## resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_resmgr.split(':')[1],
            r'.*resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_port,
            r'.*webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://%s:50070/webhdfs/v1' % namenodes[0],
            r'.*history_server_api_url=http://localhost:19888': 'history_server_api_url=%s' % yarn_log_url.split('/')[0],
            r'.*resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://%s:8088' % yarn_resmgr.split(':')[0],
            r'.*secret_key=.*': 'secret_key=%s' % uuid.uuid4()
            })

        self.update_apps()
Exemple #4
0
 def configure_zookeeper(self, zookeepers):
     dc = self.hadoop_base.dist_config
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.ha.automatic-failover.enabled'] = 'true'
     core_site = dc.path('hadoop_conf') / 'core-site.xml'
     with utils.xmlpropmap_edit_in_place(core_site) as props:
         zk_str = ','.join('{host}:{port}'.format(**zk) for zk in zookeepers)
         hookenv.log("Zookeeper string is: %s" % zk_str)
         props['ha.zookeeper.quorum'] = zk_str
     self.hadoop_base.setup_init_script("hdfs", "zkfc")
 def configure_zookeeper(self, zookeepers):
     dc = self.hadoop_base.dist_config
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.ha.automatic-failover.enabled'] = 'true'
     core_site = dc.path('hadoop_conf') / 'core-site.xml'
     with utils.xmlpropmap_edit_in_place(core_site) as props:
         zk_str = ','.join('{host}:{port}'.format(**zk)
                           for zk in zookeepers)
         hookenv.log("Zookeeper string is: %s" % zk_str)
         props['ha.zookeeper.quorum'] = zk_str
     self.hadoop_base.setup_init_script("hdfs", "zkfc")
 def configure_yarn_base(self, host, port, history_http, history_ipc):
     dc = self.hadoop_base.dist_config
     yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml'
     with utils.xmlpropmap_edit_in_place(yarn_site) as props:
         props['yarn.nodemanager.aux-services'] = 'mapreduce_shuffle'
         props['yarn.nodemanager.vmem-check-enabled'] = 'false'
         if host:
             props['yarn.resourcemanager.hostname'] = '{}'.format(host)
             props['yarn.resourcemanager.address'] = '{}:{}'.format(host, port)
             props["yarn.log.server.url"] = "{}:{}/jobhistory/logs/".format(host, history_http)
     mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml'
     with utils.xmlpropmap_edit_in_place(mapred_site) as props:
         if host and history_ipc:
             props["mapreduce.jobhistory.address"] = "{}:{}".format(host, history_ipc)
         props["mapreduce.framework.name"] = 'yarn'
    def configure_hive(self, mysql):
        config = hookenv.config()
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props[
                'javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format(
                    mysql.host(), mysql.port(), mysql.database())
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props[
                'javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver"
            props[
                'hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[
                    self.cpu_arch]

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(
            hive_env, {
                r'.*export HADOOP_HEAPSIZE *=.*':
                'export HADOOP_HEAPSIZE=%s' % config['heap'],
                r'.*export HIVE_AUX_JARS_PATH *=.*':
                'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar',
            })

        # Now that we have db connection info, init our schema (only once)
        if not unitdata.kv().get('hive.schema.initialized'):
            utils.run_as('hive', 'schematool', '-initSchema', '-dbType',
                         'mysql')
            unitdata.kv().set('hive.schema.initialized', True)
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    nn_host = get_fqdn()
    hosts = {'namenode': nn_host}
    bigtop.render_site_yaml(hosts=hosts, roles='namenode')
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Exemple #9
0
    def configure_remote_db(self, mysql):
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        jdbc_url = \
            "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format(
                mysql.host(), mysql.port(), mysql.database()
            )
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = jdbc_url
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = \
                "com.mysql.jdbc.Driver"

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HIVE_AUX_JARS_PATH *=.*':
            ('export HIVE_AUX_JARS_PATH='
             '/usr/share/java/mysql-connector-java.jar'),
        })

        # Now that we have db connection info, init our schema (only once)
        remote_db = hookenv.remote_service_name()
        if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db):
            tool_path = "{}/bin/schematool".format(
                self.dist_config.path('hive'))
            utils.run_as(
                'ubuntu', tool_path, '-initSchema', '-dbType', 'mysql')
            unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True)
            unitdata.kv().flush(True)
    def setup_hive_config(self):
        '''
        copy the default configuration files to hive_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('hive') / 'conf'
        hive_conf = self.dist_config.path('hive_conf')
        hive_conf.rmtree_p()
        default_conf.copytree(hive_conf)

        # Configure immutable bits
        hive_bin = self.dist_config.path('hive') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hive_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hive_bin])
            env['HIVE_CONF_DIR'] = self.dist_config.path('hive_conf')

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy(hive_env)

        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        if not hive_site.exists():
            (self.dist_config.path('hive_conf') / 'hive-default.xml.template').copy(hive_site)
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            # TODO (kwm): we should be able to export java.io.tmpdir so these 4 arent needed
            props['hive.exec.local.scratchdir'] = "/tmp/hive"
            props['hive.downloaded.resources.dir'] = "/tmp/hive_resources"
            props['hive.querylog.location'] = "/tmp/hive"
            props['hive.server2.logging.operation.log.location'] = "/tmp/hive"
            ####

        # create hdfs storage space
        utils.run_as('hive', 'hdfs', 'dfs', '-mkdir', '-p', '/user/hive/warehouse')
    def configure_namenode(self, secondary_host=None, secondary_port=None):
        dc = self.hadoop_base.dist_config
        host = hookenv.local_unit().replace('/', '-')
        port = dc.port('namenode')
        self.configure_hdfs_base(host, port)
        cfg = self.hadoop_base.charm_config
        hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
        with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
            props['dfs.replication'] = cfg['dfs_replication']
            props['dfs.blocksize'] = int(cfg['dfs_blocksize'])
            props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'true'
            props['dfs.namenode.http-address'] = '0.0.0.0:{}'.format(dc.port('nn_webapp_http'))
            # TODO: support SSL
            # props['dfs.namenode.https-address'] = '0.0.0.0:{}'.format(dc.port('nn_webapp_https'))

            # FIXME hack-around until transition to layers is complete
            if not (secondary_host and secondary_port) and helpers:
                unit, secondary = helpers.any_ready_unit('secondary')
                if unit:
                    secondary_host = secondary['hostname']
                    secondary_port = secondary['port']
            if secondary_host and secondary_port:
                props['dfs.secondary.http.address'] = '{host}:{port}'.format(
                    host=secondary_host,
                    port=secondary_port,
                )
Exemple #12
0
    def configure_remote_db(self, mysql):
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        jdbc_url = \
            "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format(
                mysql.host(), mysql.port(), mysql.database()
            )
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = jdbc_url
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = \
                "com.mysql.jdbc.Driver"

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(
            hive_env, {
                r'.*export HIVE_AUX_JARS_PATH *=.*':
                ('export HIVE_AUX_JARS_PATH='
                 '/usr/share/java/mysql-connector-java.jar'),
            })

        # Now that we have db connection info, init our schema (only once)
        remote_db = hookenv.remote_service_name()
        if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db):
            tool_path = "{}/bin/schematool".format(
                self.dist_config.path('hive'))
            utils.run_as('ubuntu', tool_path, '-initSchema', '-dbType',
                         'mysql')
            unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True)
            unitdata.kv().flush(True)
 def configure_resourcemanager(self):
     self.configure_yarn_base(*self._local())
     dc = self.hadoop_base.dist_config
     yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml'
     with utils.xmlpropmap_edit_in_place(yarn_site) as props:
         # 0.0.0.0 will listen on all interfaces, which is what we want on the server
         props['yarn.resourcemanager.webapp.address'] = '0.0.0.0:{}'.format(dc.port('rm_webapp_http'))
Exemple #14
0
 def register_journalnodes(self, nodes, port):
     clustername = hookenv.service_name()
     hdfs_site = self.hadoop_base.dist_config.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.namenode.shared.edits.dir'] = 'qjournal://{}/{}'.format(
             ';'.join(['%s:%s' % (host, port) for host in nodes]),
             clustername)
 def configure_hdfs_base(self, clustername, namenodes, port, webhdfs_port):
     dc = self.hadoop_base.dist_config
     core_site = dc.path('hadoop_conf') / 'core-site.xml'
     with utils.xmlpropmap_edit_in_place(core_site) as props:
         props['hadoop.proxyuser.hue.hosts'] = "*"
         props['hadoop.proxyuser.hue.groups'] = "*"
         props['hadoop.proxyuser.oozie.groups'] = '*'
         props['hadoop.proxyuser.oozie.hosts'] = '*'
         if 'lzo' in self.hadoop_base.resources:
             props['io.compression.codecs'] = (
                 'org.apache.hadoop.io.compress.GzipCodec, '
                 'org.apache.hadoop.io.compress.DefaultCodec, '
                 'org.apache.hadoop.io.compress.BZip2Codec, '
                 'org.apache.hadoop.io.compress.SnappyCodec, '
                 'com.hadoop.compression.lzo.LzoCodec, '
                 'com.hadoop.compression.lzo.LzopCodec')
             props[
                 'io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec'
         else:
             props['io.compression.codecs'] = (
                 'org.apache.hadoop.io.compress.GzipCodec, '
                 'org.apache.hadoop.io.compress.DefaultCodec, '
                 'org.apache.hadoop.io.compress.BZip2Codec, '
                 'org.apache.hadoop.io.compress.SnappyCodec')
         props['fs.defaultFS'] = "hdfs://{clustername}".format(
             clustername=clustername, port=port)
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.webhdfs.enabled'] = "true"
         props['dfs.namenode.name.dir'] = dc.path(
             'hdfs_dir_base') / 'cache/hadoop/dfs/name'
         props['dfs.datanode.data.dir'] = dc.path(
             'hdfs_dir_base') / 'cache/hadoop/dfs/name'
         props[
             'dfs.permissions'] = 'false'  # TODO - secure this hadoop installation!
         props['dfs.nameservices'] = clustername
         props['dfs.client.failover.proxy.provider.%s' % clustername] = \
             'org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
         props['dfs.ha.fencing.methods'] = 'sshfence\nshell(/bin/true)'
         props['dfs.ha.fencing.ssh.private-key-files'] = utils.ssh_priv_key(
             'hdfs')
         props['dfs.ha.namenodes.%s' % clustername] = ','.join(namenodes)
         for node in namenodes:
             props['dfs.namenode.rpc-address.%s.%s' %
                   (clustername, node)] = '%s:%s' % (node, port)
             props['dfs.namenode.http-address.%s.%s' %
                   (clustername, node)] = '%s:%s' % (node, webhdfs_port)
 def register_journalnodes(self, nodes, port):
     clustername = hookenv.service_name()
     hdfs_site = self.hadoop_base.dist_config.path(
         'hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.namenode.shared.edits.dir'] = 'qjournal://{}/{}'.format(
             ';'.join(['%s:%s' % (host, port) for host in nodes]),
             clustername)
 def configure_journalnode(self):
     dc = self.hadoop_base.dist_config
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.journalnode.rpc-address'] = '0.0.0.0:{}'.format(
             dc.port('journalnode'))
         props['dfs.journalnode.http-address'] = '0.0.0.0:{}'.format(
             dc.port('jn_webapp_http'))
Exemple #18
0
 def configure_datanode(self, clustername, namenodes, port, webhdfs_port):
     self.configure_hdfs_base(clustername, namenodes, port, webhdfs_port)
     dc = self.hadoop_base.dist_config
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.datanode.http.address'] = '0.0.0.0:{}'.format(dc.port('dn_webapp_http'))
     self.hadoop_base.setup_init_script("hdfs", "datanode")
     self.hadoop_base.setup_init_script("hdfs", "journalnode")
 def configure_datanode(self, host=None, port=None):
     if not (host and port):
         host, port = self._remote("datanode")
     self.configure_hdfs_base(host, port)
     dc = self.hadoop_base.dist_config
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.datanode.http.address'] = '0.0.0.0:{}'.format(dc.port('dn_webapp_http'))
 def configure_jobhistory(self):
     self.configure_yarn_base(*self._local())
     dc = self.hadoop_base.dist_config
     mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml'
     with utils.xmlpropmap_edit_in_place(mapred_site) as props:
         # 0.0.0.0 will listen on all interfaces, which is what we want on the server
         props["mapreduce.jobhistory.address"] = "0.0.0.0:{}".format(dc.port('jobhistory'))
         props["mapreduce.jobhistory.webapp.address"] = "0.0.0.0:{}".format(dc.port('jh_webapp_http'))
 def configure_datanode(self, clustername, namenodes, port, webhdfs_port):
     self.configure_hdfs_base(clustername, namenodes, port, webhdfs_port)
     dc = self.hadoop_base.dist_config
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.datanode.http.address'] = '0.0.0.0:{}'.format(
             dc.port('dn_webapp_http'))
     self.hadoop_base.setup_init_script("hdfs", "datanode")
     self.hadoop_base.setup_init_script("hdfs", "journalnode")
Exemple #22
0
 def setup_oozie_config(self):
     # copy default config into alternate dir
     conf_dir = self.dist_config.path('oozie') / 'conf'
     self.dist_config.path('oozie_conf').rmtree_p()
     conf_dir.copytree(self.dist_config.path('oozie_conf'))
     oozie_conf = self.dist_config.path('oozie_conf') / "oozie-site.xml"
     with utils.xmlpropmap_edit_in_place(oozie_conf) as e:
         e['oozie.service.ProxyUserService.proxyuser.hue.hosts'] = '*'
         e['oozie.service.ProxyUserService.proxyuser.hue.groups'] = '*'
Exemple #23
0
    def setup_hue(self):
        hue_bin = self.dist_config.path('hue') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hue_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hue_bin])
            env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin'
            env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir')
            hadoop_conf = env['HADOOP_CONF_DIR'] + '/core-site.xml'
            yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml'
            mapred_conf = env['HADOOP_CONF_DIR'] + '/mapred-site.xml'

        with utils.xmlpropmap_edit_in_place(hadoop_conf) as props:
            hdfs_endpoint = props['fs.defaultFS']

        with utils.xmlpropmap_edit_in_place(yarn_conf) as props:
            yarn_log_url = props['yarn.log.server.url'] # 19888
            yarn_resmgr = props['yarn.resourcemanager.address'] # 8032

        with utils.xmlpropmap_edit_in_place(mapred_conf) as props:
            mapred_jobhistory = props['mapreduce.jobhistory.address'] # 10020

        default_conf = self.dist_config.path('hue') / 'desktop/conf'
        hue_conf = self.dist_config.path('hue_conf')
        hue_conf.rmtree_p()
        default_conf.copytree(hue_conf)
        # Now remove the conf included in the tarball and symlink our real conf
        default_conf.rmtree_p()
        hue_conf.symlink(default_conf)

        hdfs_fulluri = hdfs_endpoint.split('/')[2]
        hdfs_hostname = hdfs_fulluri.split(':')[0]

        hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini'))
        hue_port = self.dist_config.port('hue_web')
        utils.re_edit_in_place(hue_config, {
            r'http_port=8888': 'http_port=%s' % hue_port,
            r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaults=%s' % hdfs_endpoint,
            r'## resourcemanager_host=localhost': 'resourcemanager_host=%s' % yarn_resmgr.split(':')[0],
            r'## resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_resmgr.split(':')[1],
            r'## webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://%s:50070/webhdfs/v1' % hdfs_hostname,
            r'## history_server_api_url=http://localhost:19888': 'history_server_api_url=%s' % yarn_log_url.split('/')[0],
            r'## resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://%s:8088' % yarn_resmgr.split(':')[0]
            })
Exemple #24
0
 def configure_namenode(self, namenodes):
     dc = self.hadoop_base.dist_config
     clustername = hookenv.service_name()
     host = hookenv.local_unit().replace('/', '-')
     self.configure_hdfs_base(clustername, namenodes, dc.port('namenode'), dc.port('nn_webapp_http'))
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'true'
         props['dfs.namenode.http-address.%s.%s' % (clustername, host)] = '%s:%s' % (host, dc.port('nn_webapp_http'))
     self.hadoop_base.setup_init_script("hdfs", "namenode")
Exemple #25
0
    def configure_zeppelin(self):
        '''
        Configure zeppelin environment for all users
        '''
        zeppelin_bin = self.dist_config.path('zeppelin') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if zeppelin_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], zeppelin_bin])
            env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf')

        zeppelin_site = self.dist_config.path(
            'zeppelin_conf') / 'zeppelin-site.xml'
        with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml:
            xml['zeppelin.server.port'] = self.dist_config.port('zeppelin')
            xml['zeppelin.notebook.dir'] = self.dist_config.path(
                'zeppelin_notebooks')

        etc_env = utils.read_etc_env()
        hadoop_conf_dir = etc_env.get('HADOOP_CONF_DIR', '/etc/hadoop/conf')
        spark_home = etc_env.get('SPARK_HOME', '/usr/lib/spark')
        spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g')
        spark_exe_mode = os.environ.get('MASTER', 'yarn-client')
        spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g')
        zeppelin_env = self.dist_config.path(
            'zeppelin_conf') / 'zeppelin-env.sh'
        with open(zeppelin_env, "a") as f:
            f.write('export ZEPPELIN_HOME={}\n'.format(
                self.dist_config.path('zeppelin')))
            f.write(
                'export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'
                .format(spark_driver_mem, spark_executor_mem))
            f.write('export ZEPPELIN_LOG_DIR={}\n'.format(
                self.dist_config.path('zeppelin_logs')))
            f.write(
                'export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n'
            )
            f.write('export ZEPPELIN_NOTEBOOK_DIR={}\n'.format(
                self.dist_config.path('zeppelin_notebooks')))
            f.write('export SPARK_HOME={}\n'.format(spark_home))
            f.write(
                'export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'
                .format(spark_driver_mem, spark_executor_mem))
            f.write('export HADOOP_CONF_DIR={}\n'.format(hadoop_conf_dir))
            f.write(
                'export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n'
                .format(s=spark_home))
            f.write('export MASTER={}\n'.format(spark_exe_mode))

        # User needs write access to zepp's conf to write interpreter.json
        # on server start. chown the whole conf dir, though we could probably
        # touch that file and chown it, leaving the rest owned as root:root.
        # TODO: weigh implications of have zepp's conf dir owned by non-root.
        cmd = "chown -R ubuntu:hadoop {}".format(
            self.dist_config.path('zeppelin_conf'))
        call(cmd.split())
Exemple #26
0
 def configure_jobhistory(self):
     self.configure_yarn_base(*self._local())
     dc = self.hadoop_base.dist_config
     mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml'
     with utils.xmlpropmap_edit_in_place(mapred_site) as props:
         # 0.0.0.0 will listen on all interfaces, which is what we want on the server
         props["mapreduce.jobhistory.address"] = "0.0.0.0:{}".format(dc.port('jobhistory'))
         props["mapreduce.jobhistory.webapp.address"] = "0.0.0.0:{}".format(dc.port('jh_webapp_http'))
         props["mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp"
         props["mapreduce.jobhistory.done-dir"] = "/mr-history/done"
     self.hadoop_base.setup_init_script(user='******', servicename='historyserver')
 def test_xmlpropmap_edit_in_place(self):
     fd, filename = tempfile.mkstemp()
     os.close(fd)
     tmp_file = Path(filename)
     try:
         tmp_file.write_text(
             '<?xml version="1.0"?>\n'
             '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>\n'
             '\n'
             '<!-- Put site-specific property overrides in this file. -->\n'
             '\n'
             '<configuration>\n'
             '   <property>\n'
             '       <name>modify.me</name>\n'
             '       <value>1</value>\n'
             '       <description>Property to be modified</description>\n'
             '   </property>\n'
             '   <property>\n'
             '       <name>delete.me</name>\n'
             '       <value>None</value>\n'
             '       <description>Property to be removed</description>\n'
             '   </property>\n'
             '   <property>\n'
             '       <name>do.not.modify.me</name>\n'
             '       <value>0</value>\n'
             '       <description>Property to *not* be modified</description>\n'
             '   </property>\n'
             '</configuration>')
         with utils.xmlpropmap_edit_in_place(tmp_file) as props:
             del props['delete.me']
             props['modify.me'] = 'one'
             props['add.me'] = 'NEW'
         self.assertEqual(
             tmp_file.text(),
             '<?xml version="1.0" ?>\n'
             '<configuration>\n'
             '    <property>\n'
             '        <name>modify.me</name>\n'
             '        <value>one</value>\n'
             '        <description>Property to be modified</description>\n'
             '    </property>\n'
             '    <property>\n'
             '        <name>do.not.modify.me</name>\n'
             '        <value>0</value>\n'
             '        <description>Property to *not* be modified</description>\n'
             '    </property>\n'
             '    <property>\n'
             '        <name>add.me</name>\n'
             '        <value>NEW</value>\n'
             '    </property>\n'
             '</configuration>\n')
     finally:
         tmp_file.remove()
 def configure_resourcemanager(self):
     self.configure_yarn_base(*self._local())
     dc = self.hadoop_base.dist_config
     yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml'
     with utils.xmlpropmap_edit_in_place(yarn_site) as props:
         # 0.0.0.0 will listen on all interfaces, which is what we want on the server
         props['yarn.resourcemanager.webapp.address'] = '0.0.0.0:{}'.format(
             dc.port('rm_webapp_http'))
         # TODO: support SSL
         # props['yarn.resourcemanager.webapp.https.address'] = '0.0.0.0:{}'.format(dc.port('rm_webapp_https'))
     self.hadoop_base.setup_init_script(user='******',
                                        servicename='resourcemanager')
    def trigger_puppet(self):
        # If we can't reverse resolve the hostname (like on azure), support DN
        # registration by IP address.
        # NB: determine this *before* updating /etc/hosts below since
        # gethostbyaddr will not fail if we have an /etc/hosts entry.
        reverse_dns_bad = False
        try:
            socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip()))
        except socket.herror:
            reverse_dns_bad = True
        # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on
        # java to do a hostname lookup on clouds that have >64 char fqdns
        # (gce). Force short hostname (< 64 char) into /etc/hosts as workaround.
        # Better fix may be to move to java8. See http://paste.ubuntu.com/16230171/
        # NB: do this before the puppet apply, which may call java stuffs
        # like format namenode, which will fail if we dont get this fix
        # down early.
        short_host = subprocess.check_output(['facter', 'hostname']).strip().decode()
        private_ip = utils.resolve_private_address(hookenv.unit_private_ip())
        if short_host and private_ip:
            utils.update_kv_host(private_ip, short_host)
            utils.manage_etc_hosts()

        charm_dir = hookenv.charm_dir()
        # TODO JIRA KWM: rm does not need Hdfs_init and will fail
        rm_patch = Path(charm_dir) / 'resources/patch1_rm_init_hdfs.patch'
        # TODO JIRA KWM: nm should not *need* mapred role. we could patch it
        # with nm_patch, or adjust nm charm to include mapred role. for now,
        # we're doing the latter. todo rfc from dev@bigtop list.
        # nm_patch = Path(charm_dir) / 'resources/patch2_nm_core-site.patch'
        # TODO JIRA KWM: client role needs common_yarn for yarn-site.xml
        client_patch = Path(charm_dir) / 'resources/patch3_client_role_use_common_yarn.patch'
        with chdir("{}".format(self.bigtop_base)):
            # rm patch goes first
            utils.run_as('root', 'patch', '-p1', '-s', '-i', rm_patch)
            # skip nm_patch for now since nm charm is including mapred role
            # utils.run_as('root', 'patch', '-p1', '-s', '-i', nm_patch)
            # client patch goes last
            utils.run_as('root', 'patch', '-p1', '-s', '-i', client_patch)
        # TODO FIX ABOVE KWM

        # puppet apply needs to be ran where recipes were unpacked
        with chdir("{}".format(self.bigtop_base)):
            utils.run_as('root', 'puppet', 'apply', '-d',
                         '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"',
                         'bigtop-deploy/puppet/manifests/site.pp')

        # Do any post-puppet config on the generated config files.
        if reverse_dns_bad:
            hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
            with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
                props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'false'
 def test_xmlpropmap_edit_in_place(self):
     fd, filename = tempfile.mkstemp()
     os.close(fd)
     tmp_file = Path(filename)
     try:
         tmp_file.write_text(
             '<?xml version="1.0"?>\n'
             '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>\n'
             '\n'
             '<!-- Put site-specific property overrides in this file. -->\n'
             '\n'
             '<configuration>\n'
             '   <property>\n'
             '       <name>modify.me</name>\n'
             '       <value>1</value>\n'
             '       <description>Property to be modified</description>\n'
             '   </property>\n'
             '   <property>\n'
             '       <name>delete.me</name>\n'
             '       <value>None</value>\n'
             '       <description>Property to be removed</description>\n'
             '   </property>\n'
             '   <property>\n'
             '       <name>do.not.modify.me</name>\n'
             '       <value>0</value>\n'
             '       <description>Property to *not* be modified</description>\n'
             '   </property>\n'
             '</configuration>')
         with utils.xmlpropmap_edit_in_place(tmp_file) as props:
             del props['delete.me']
             props['modify.me'] = 'one'
             props['add.me'] = 'NEW'
         self.assertEqual(
             tmp_file.text(), '<?xml version="1.0" ?>\n'
             '<configuration>\n'
             '    <property>\n'
             '        <name>modify.me</name>\n'
             '        <value>one</value>\n'
             '        <description>Property to be modified</description>\n'
             '    </property>\n'
             '    <property>\n'
             '        <name>do.not.modify.me</name>\n'
             '        <value>0</value>\n'
             '        <description>Property to *not* be modified</description>\n'
             '    </property>\n'
             '    <property>\n'
             '        <name>add.me</name>\n'
             '        <value>NEW</value>\n'
             '    </property>\n'
             '</configuration>\n')
     finally:
         tmp_file.remove()
Exemple #31
0
 def configure_yarn_base(self, host, port, history_http, history_ipc):
     dc = self.hadoop_base.dist_config
     yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml'
     with utils.xmlpropmap_edit_in_place(yarn_site) as props:
         props['yarn.nodemanager.aux-services'] = 'mapreduce_shuffle'
         props['yarn.nodemanager.vmem-check-enabled'] = 'false'
         if host:
             props['yarn.resourcemanager.hostname'] = '{}'.format(host)
             props['yarn.resourcemanager.address'] = '{}:{}'.format(host, port)
             props["yarn.log.server.url"] = "{}:{}/jobhistory/logs/".format(host, history_http)
     mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml'
     with utils.xmlpropmap_edit_in_place(mapred_site) as props:
         if host and history_ipc:
             props["mapreduce.jobhistory.address"] = "{}:{}".format(host, history_ipc)
         if host and history_http:
             props["mapreduce.jobhistory.webapp.address"] = "{}:{}".format(host, history_http)
         props["mapreduce.framework.name"] = 'yarn'
         props["mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp"
         props["mapreduce.jobhistory.done-dir"] = "/mr-history/done"
         props["mapreduce.map.output.compress"] = 'true'
         props["mapred.map.output.compress.codec"] = 'org.apache.hadoop.io.compress.SnappyCodec'
         props["mapreduce.application.classpath"] = "$HADOOP_HOME/share/hadoop/mapreduce/*,\
 def configure_hdfs_base(self, host, port):
     dc = self.hadoop_base.dist_config
     core_site = dc.path('hadoop_conf') / 'core-site.xml'
     with utils.xmlpropmap_edit_in_place(core_site) as props:
         if host and port:
             props['fs.defaultFS'] = "hdfs://{host}:{port}".format(host=host, port=port)
         props['hadoop.proxyuser.hue.hosts'] = "*"
         props['hadoop.proxyuser.hue.groups'] = "*"
         props['hadoop.proxyuser.oozie.groups'] = '*'
         props['hadoop.proxyuser.oozie.hosts'] = '*'
         lzo_installed = unitdata.kv().get('hadoop.lzo.installed')
         lzo_enabled = hookenv.config().get('compression') == 'lzo'
         if lzo_installed and lzo_enabled:
             props['io.compression.codecs'] = ('com.hadoop.compression.lzo.LzoCodec, '
                                               'com.hadoop.compression.lzo.LzopCodec')
             props['io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec'
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.webhdfs.enabled'] = "true"
         props['dfs.namenode.name.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name'
         props['dfs.datanode.data.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name'
         props['dfs.permissions'] = 'false'  # TODO - secure this hadoop installation!
Exemple #33
0
    def configure_local_db(self):
        local_url = 'jdbc:derby:;databaseName=/var/lib/hive/metastore/metastore_db;create=true'
        local_driver = 'org.apache.derby.jdbc.EmbeddedDriver'
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = local_url
            props['javax.jdo.option.ConnectionUserName'] = '******'
            props['javax.jdo.option.ConnectionPassword'] = '******'
            props['javax.jdo.option.ConnectionDriverName'] = local_driver

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HIVE_AUX_JARS_PATH *=.*': '# export HIVE_AUX_JARS_PATH=',
        })
Exemple #34
0
 def configure_hdfs_base(self, clustername, namenodes, port, webhdfs_port):
     dc = self.hadoop_base.dist_config
     core_site = dc.path('hadoop_conf') / 'core-site.xml'
     with utils.xmlpropmap_edit_in_place(core_site) as props:
         props['hadoop.proxyuser.hue.hosts'] = "*"
         props['hadoop.proxyuser.hue.groups'] = "*"
         props['hadoop.proxyuser.oozie.groups'] = '*'
         props['hadoop.proxyuser.oozie.hosts'] = '*'
         if 'lzo' in self.hadoop_base.resources:
             props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, '
                                               'org.apache.hadoop.io.compress.DefaultCodec, '
                                               'org.apache.hadoop.io.compress.BZip2Codec, '
                                               'org.apache.hadoop.io.compress.SnappyCodec, '
                                               'com.hadoop.compression.lzo.LzoCodec, '
                                               'com.hadoop.compression.lzo.LzopCodec')
             props['io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec'
         else:
             props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, '
                                               'org.apache.hadoop.io.compress.DefaultCodec, '
                                               'org.apache.hadoop.io.compress.BZip2Codec, '
                                               'org.apache.hadoop.io.compress.SnappyCodec')
         props['fs.defaultFS'] = "hdfs://{clustername}".format(clustername=clustername, port=port)
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.webhdfs.enabled'] = "true"
         props['dfs.namenode.name.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name'
         props['dfs.datanode.data.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name'
         props['dfs.permissions'] = 'false'  # TODO - secure this hadoop installation!
         props['dfs.nameservices'] = clustername
         props['dfs.client.failover.proxy.provider.%s' % clustername] = \
             'org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
         props['dfs.ha.fencing.methods'] = 'sshfence\nshell(/bin/true)'
         props['dfs.ha.fencing.ssh.private-key-files'] = utils.ssh_priv_key('hdfs')
         props['dfs.ha.namenodes.%s' % clustername] = ','.join(namenodes)
         for node in namenodes:
             props['dfs.namenode.rpc-address.%s.%s' % (clustername, node)] = '%s:%s' % (node, port)
             props['dfs.namenode.http-address.%s.%s' % (clustername, node)] = '%s:%s' % (node, webhdfs_port)
 def configure_jobhistory(self):
     self.configure_yarn_base(*self._local())
     dc = self.hadoop_base.dist_config
     mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml'
     with utils.xmlpropmap_edit_in_place(mapred_site) as props:
         # 0.0.0.0 will listen on all interfaces, which is what we want on the server
         props["mapreduce.jobhistory.address"] = "0.0.0.0:{}".format(
             dc.port('jobhistory'))
         props["mapreduce.jobhistory.webapp.address"] = "0.0.0.0:{}".format(
             dc.port('jh_webapp_http'))
         props[
             "mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp"
         props["mapreduce.jobhistory.done-dir"] = "/mr-history/done"
     self.hadoop_base.setup_init_script(user='******',
                                        servicename='historyserver')
    def configure_zeppelin(self):
        """
        Configure zeppelin environment for all users
        """
        zeppelin_bin = self.dist_config.path("zeppelin") / "bin"
        with utils.environment_edit_in_place("/etc/environment") as env:
            if zeppelin_bin not in env["PATH"]:
                env["PATH"] = ":".join([env["PATH"], zeppelin_bin])
            env["ZEPPELIN_CONF_DIR"] = self.dist_config.path("zeppelin_conf")

        zeppelin_site = self.dist_config.path("zeppelin_conf") / "zeppelin-site.xml"
        with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml:
            xml["zeppelin.server.port"] = self.dist_config.port("zeppelin")
            xml["zeppelin.notebook.dir"] = self.dist_config.path("zeppelin_notebooks")

        etc_env = utils.read_etc_env()
        hadoop_conf_dir = etc_env.get("HADOOP_CONF_DIR", "/etc/hadoop/conf")
        spark_home = etc_env.get("SPARK_HOME", "/usr/lib/spark")
        spark_driver_mem = etc_env.get("SPARK_DRIVER_MEMORY", "1g")
        spark_exe_mode = os.environ.get("MASTER", "yarn-client")
        spark_executor_mem = etc_env.get("SPARK_EXECUTOR_MEMORY", "1g")
        zeppelin_env = self.dist_config.path("zeppelin_conf") / "zeppelin-env.sh"
        with open(zeppelin_env, "a") as f:
            f.write("export ZEPPELIN_HOME={}\n".format(self.dist_config.path("zeppelin")))
            f.write(
                'export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'.format(
                    spark_driver_mem, spark_executor_mem
                )
            )
            f.write("export ZEPPELIN_LOG_DIR={}\n".format(self.dist_config.path("zeppelin_logs")))
            f.write('export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n')
            f.write("export ZEPPELIN_NOTEBOOK_DIR={}\n".format(self.dist_config.path("zeppelin_notebooks")))
            f.write("export SPARK_HOME={}\n".format(spark_home))
            f.write(
                'export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'.format(
                    spark_driver_mem, spark_executor_mem
                )
            )
            f.write("export HADOOP_CONF_DIR={}\n".format(hadoop_conf_dir))
            f.write("export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n".format(s=spark_home))
            f.write("export MASTER={}\n".format(spark_exe_mode))

        # User needs write access to zepp's conf to write interpreter.json
        # on server start. chown the whole conf dir, though we could probably
        # touch that file and chown it, leaving the rest owned as root:root.
        # TODO: weigh implications of have zepp's conf dir owned by non-root.
        cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path("zeppelin_conf"))
        call(cmd.split())
    def configure_zeppelin(self):
        '''
        Configure zeppelin environment for all users
        '''
        zeppelin_bin = self.dist_config.path('zeppelin') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if zeppelin_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], zeppelin_bin])
            env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf')

        zeppelin_site = self.dist_config.path('zeppelin_conf') / 'zeppelin-site.xml'
        with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml:
            xml['zeppelin.server.port'] = self.dist_config.port('zeppelin')
            xml['zeppelin.notebook.dir'] = self.dist_config.path('zeppelin_notebooks')

        etc_env = utils.read_etc_env()
        hadoop_conf_dir = etc_env.get('HADOOP_CONF_DIR', '/etc/hadoop/conf')
        hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '')
        spark_home = etc_env.get('SPARK_HOME', '/usr/lib/spark')
        spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g')
        spark_exe_mode = os.environ.get('MASTER', 'yarn-client')
        spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g')
        zeppelin_env = self.dist_config.path('zeppelin_conf') / 'zeppelin-env.sh'
        with open(zeppelin_env, "a") as f:
            f.write('export ZEPPELIN_CLASSPATH_OVERRIDES={}\n'.format(hadoop_extra_classpath))
            f.write('export ZEPPELIN_HOME={}\n'.format(self.dist_config.path('zeppelin')))
            f.write('export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'.format(
                spark_driver_mem,
                spark_executor_mem))
            f.write('export ZEPPELIN_LOG_DIR={}\n'.format(self.dist_config.path('zeppelin_logs')))
            f.write('export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n')
            f.write('export ZEPPELIN_NOTEBOOK_DIR={}\n'.format(self.dist_config.path('zeppelin_notebooks')))
            f.write('export SPARK_HOME={}\n'.format(spark_home))
            f.write('export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'.format(
                spark_driver_mem,
                spark_executor_mem))
            f.write('export HADOOP_CONF_DIR={}\n'.format(hadoop_conf_dir))
            f.write('export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n'.format(s=spark_home))
            f.write('export MASTER={}\n'.format(spark_exe_mode))

        # User needs write access to zepp's conf to write interpreter.json
        # on server start. chown the whole conf dir, though we could probably
        # touch that file and chown it, leaving the rest owned as root:root.
        # TODO: weigh implications of have zepp's conf dir owned by non-root.
        cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path('zeppelin_conf'))
        call(cmd.split())
Exemple #38
0
    def configure_local_db(self):
        local_url = \
            ('jdbc:derby:;databaseName='
             '/var/lib/hive/metastore/metastore_db;create=true')
        local_driver = 'org.apache.derby.jdbc.EmbeddedDriver'
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = local_url
            props['javax.jdo.option.ConnectionUserName'] = '******'
            props['javax.jdo.option.ConnectionPassword'] = '******'
            props['javax.jdo.option.ConnectionDriverName'] = local_driver

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HIVE_AUX_JARS_PATH *=.*':
            '# export HIVE_AUX_JARS_PATH=',
        })
 def configure_namenode(self, namenodes):
     dc = self.hadoop_base.dist_config
     clustername = hookenv.service_name()
     host = hookenv.local_unit().replace('/', '-')
     self.configure_hdfs_base(clustername, namenodes, dc.port('namenode'),
                              dc.port('nn_webapp_http'))
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props[
             'dfs.namenode.datanode.registration.ip-hostname-check'] = 'true'
         props['dfs.namenode.http-address.%s.%s' %
               (clustername, host)] = '%s:%s' % (host,
                                                 dc.port('nn_webapp_http'))
         props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
         props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
         props['dfs.namenode.http-bind-host'] = '0.0.0.0'
         props['dfs.namenode.https-bind-host'] = '0.0.0.0'
     self.hadoop_base.setup_init_script("hdfs", "namenode")
Exemple #40
0
    def setup_hue(self, namenodes, resourcemanagers, hdfs_port, yarn_port):
        hookenv.status_set('maintenance', 'Setting up Hue')
        hue_bin = self.dist_config.path('hue') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hue_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hue_bin])
            env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin'
            env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir')
            yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml'


        with utils.xmlpropmap_edit_in_place(yarn_conf) as props:
            yarn_log_url = props['yarn.log.server.url'] # 19888
            yarn_resmgr = props['yarn.resourcemanager.address'] # 8032

        default_conf = self.dist_config.path('hue') / 'desktop/conf'
        hue_conf = self.dist_config.path('hue_conf')

        if os.path.islink('/usr/lib/hue/desktop/conf'):
            return
        else:
            hue_conf.rmtree_p()
            default_conf.copytree(hue_conf)
            # Now remove the conf included in the tarball and symlink our real conf
            default_conf.rmtree_p()
            hue_conf.symlink(default_conf)

        hue_port = self.dist_config.port('hue_web')

        # Fix following for HA: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_hadoop-ha/content/ha-nn-deploy-hue.html
        hookenv.log("Not currently supporting HA, FIX: namenodes are: " + str(namenodes) + " resmanagers: " + str(resourcemanagers))
        utils.re_edit_in_place(self.hue_config, {
            r'http_port=8888': 'http_port={}' % hue_port,
            r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs={}:{}'.format(namenodes[0], hdfs_port),
            r'.*resourcemanager_host=localhost': 'resourcemanager_host={}'.format(resourcemanagers[0]),
            r'.*resourcemanager_port=8032': 'resourcemanager_port={}'.format(yarn_port),
            r'.*webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://{}:50070/webhdfs/v1'.format(namenodes[0]),
            r'.*history_server_api_url=http://localhost:19888': 'history_server_api_url={}'.format(yarn_log_url.split('/')[0]),
            r'.*resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://{}:8088'.format(yarn_resmgr.split(':')[0]),
            r'.*secret_key=.*': 'secret_key={}'.format(uuid.uuid4())
            })

        self.update_apps()
    def configure_hive(self, mysql):
        config = hookenv.config()
        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            props['javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format(
                mysql.host(), mysql.port(), mysql.database()
            )
            props['javax.jdo.option.ConnectionUserName'] = mysql.user()
            props['javax.jdo.option.ConnectionPassword'] = mysql.password()
            props['javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver"
            props['hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[self.cpu_arch]

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        utils.re_edit_in_place(hive_env, {
            r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'],
            r'.*export HIVE_AUX_JARS_PATH *=.*': 'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar',
        })

        # Now that we have db connection info, init our schema (only once)
        if not unitdata.kv().get('hive.schema.initialized'):
            utils.run_as('hive', 'schematool', '-initSchema', '-dbType', 'mysql')
            unitdata.kv().set('hive.schema.initialized', True)
    def configure_zeppelin(self):
        '''
        Configure zeppelin environment for all users
        '''
        zeppelin_bin = self.dist_config.path('zeppelin') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if zeppelin_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], zeppelin_bin])
            env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf')

        zeppelin_site = self.dist_config.path('zeppelin_conf') / 'zeppelin-site.xml'
        with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml:
            xml['zeppelin.server.port'] = self.dist_config.port('zeppelin')
            xml['zeppelin.websocket.port'] = self.dist_config.port('zeppelin_web')
            xml['zeppelin.notebook.dir'] = self.dist_config.path('zeppelin_notebooks')

        hadoop_conf_dir = os.environ.get('HADOOP_CONF_DIR', '/etc/hadoop/conf')
        spark_home = os.environ.get('SPARK_HOME', '/usr/lib/spark')
        spark_exe_mode = os.environ.get('MASTER', 'yarn-client')
        zeppelin_env = self.dist_config.path('zeppelin_conf') / 'zeppelin-env.sh'
        self.re_edit_in_place(zeppelin_env, {
            r'.*export ZEPPELIN_HOME.*': 'export ZEPPELIN_HOME={}'.format(self.dist_config.path('zeppelin')),
            r'.*export ZEPPELIN_LOG_DIR.*': 'export ZEPPELIN_LOG_DIR={}'.format(self.dist_config.path('zeppelin_logs')),
            r'.*export ZEPPELIN_NOTEBOOK_DIR.*': 'export ZEPPELIN_NOTEBOOK_DIR={}'.format(self.dist_config.path('zeppelin_notebooks')),
            r'.*export SPARK_HOME.*': 'export SPARK_HOME={}'.format(spark_home),
            r'.*export HADOOP_CONF_DIR.*': 'export HADOOP_CONF_DIR={}'.format(hadoop_conf_dir),
            r'.*export PYTHONPATH.*': 'export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip'.format(
                s=spark_home),
            r'.*export MASTER.*': 'export MASTER={}'.format(spark_exe_mode),
            r'.*export SPARK_YARN_USER_ENV.*': 'export SPARK_YARN_USER_ENV="PYTHONPATH=${PYTHONPATH}"',
        }, add_if_not_found=True)

        # User needs write access to zepp's conf to write interpreter.json
        # on server start. chown the whole conf dir, though we could probably
        # touch that file and chown it, leaving the rest owned as root:root.
        # TODO: weigh implications of have zepp's conf dir owned by non-root.
        cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path('zeppelin_conf'))
        call(cmd.split())
    def setup_hive_config(self):
        '''
        copy the default configuration files to hive_conf property
        defined in dist.yaml
        '''
        default_conf = self.dist_config.path('hive') / 'conf'
        hive_conf = self.dist_config.path('hive_conf')
        hive_conf.rmtree_p()
        default_conf.copytree(hive_conf)

        # Configure immutable bits
        hive_bin = self.dist_config.path('hive') / 'bin'
        with utils.environment_edit_in_place('/etc/environment') as env:
            if hive_bin not in env['PATH']:
                env['PATH'] = ':'.join([env['PATH'], hive_bin])
            env['HIVE_CONF_DIR'] = self.dist_config.path('hive_conf')

        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') /
             'hive-env.sh.template').copy(hive_env)

        hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml'
        if not hive_site.exists():
            (self.dist_config.path('hive_conf') /
             'hive-default.xml.template').copy(hive_site)
        with utils.xmlpropmap_edit_in_place(hive_site) as props:
            # TODO (kwm): we should be able to export java.io.tmpdir so these 4 arent needed
            props['hive.exec.local.scratchdir'] = "/tmp/hive"
            props['hive.downloaded.resources.dir'] = "/tmp/hive_resources"
            props['hive.querylog.location'] = "/tmp/hive"
            props['hive.server2.logging.operation.log.location'] = "/tmp/hive"
            ####

        # create hdfs storage space
        utils.run_as('hive', 'hdfs', 'dfs', '-mkdir', '-p',
                     '/user/hive/warehouse')
Exemple #44
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
    )
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
Exemple #45
0
 def build_oozie_sharelib(self):
     core_conf = self.dist_config.path('hadoop_conf') / "core-site.xml"
     with utils.xmlpropmap_edit_in_place(core_conf) as e:
         namenodeURL = e['fs.defaultFS']
     slib = '/usr/lib/oozie/'
     utils.run_as('oozie', 'oozie-setup.sh', 'sharelib', 'create', '-fs', namenodeURL, '-locallib', slib)
def reconfigure_hdfs():
    cfg = hookenv.config()
    hdfs_site = get_dist_config().path('hadoop_conf') / 'hdfs-site.xml'
    with xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.replication'] = cfg['dfs_replication']
        props['dfs.blocksize'] = int(cfg['dfs_blocksize'])
Exemple #47
0
 def configure_journalnode(self):
     dc = self.hadoop_base.dist_config
     hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml'
     with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
         props['dfs.journalnode.rpc-address'] = '0.0.0.0:{}'.format(dc.port('journalnode'))
         props['dfs.journalnode.http-address'] = '0.0.0.0:{}'.format(dc.port('jn_webapp_http'))