Ejemplo n.º 1
0
    def _write_mapred_site(self):
        self._jt_thrift_port = find_unused_port()
        self._jt_http_port = find_unused_port()
        self._jt_port = find_unused_port()
        self._tt_http_port = find_unused_port()

        mapred_configs = {
            'mapred.job.tracker':
            '%s:%s' % (
                self._fqdn,
                self._jt_port,
            ),
            'mapred.job.tracker.http.address':
            '%s:%s' % (
                self._fqdn,
                self._jt_http_port,
            ),
            'jobtracker.thrift.address':
            '%s:%s' % (
                self._fqdn,
                self._jt_thrift_port,
            ),
            'mapred.jobtracker.plugins':
            'org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin',
            'mapred.task.tracker.http.address':
            '%s:%s' % (
                self._fqdn,
                self._tt_http_port,
            ),
        }
        self._mapred_site = self._tmppath('conf/mapred-site.xml')
        write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
Ejemplo n.º 2
0
    def _write_mapred_site(self):
        self._jh_port = find_unused_port()
        self._jh_web_port = find_unused_port()
        self._mr_shuffle_port = find_unused_port()

        mapred_configs = {
            'mapred.job.tracker':
            '%s:%s' % (
                self._fqdn,
                self._rm_port,
            ),
            'mapreduce.framework.name':
            'yarn',
            'mapreduce.jobhistory.address':
            '%s:%s' % (
                self._fqdn,
                self._jh_port,
            ),
            'mapreduce.jobhistory.webapp.address':
            '%s:%s' % (
                self._fqdn,
                self._jh_web_port,
            ),
            'mapreduce.task.tmp.dir':
            self._tmppath('tasks'),
            'mapreduce.shuffle.port':
            self._mr_shuffle_port,
        }
        self._mapred_site = self._tmppath('conf/mapred-site.xml')
        write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
Ejemplo n.º 3
0
  def _write_mapred_site(self):
    self._jt_thrift_port = find_unused_port()
    self._jt_http_port = find_unused_port()
    self._jt_port = find_unused_port()
    self._tt_http_port = find_unused_port()

    mapred_configs = {
      'mapred.job.tracker': '%s:%s' % (self._fqdn, self._jt_port,),
      'mapred.job.tracker.http.address': '%s:%s' % (self._fqdn, self._jt_http_port,),
      'jobtracker.thrift.address': '%s:%s' % (self._fqdn, self._jt_thrift_port,),
      'mapred.jobtracker.plugins': 'org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin',
      'mapred.task.tracker.http.address': '%s:%s' % (self._fqdn, self._tt_http_port,),
    }
    write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
Ejemplo n.º 4
0
  def _write_mapred_site(self):
    self._jh_port = find_unused_port()
    self._jh_web_port = find_unused_port()
    self._mr_shuffle_port = find_unused_port()

    mapred_configs = {
      'mapred.job.tracker': '%s:%s' % (self._fqdn, self._rm_port,),
      'mapreduce.framework.name': 'yarn',
      'mapreduce.jobhistory.address': '%s:%s' % (self._fqdn, self._jh_port,),
      'mapreduce.jobhistory.webapp.address': '%s:%s' % (self._fqdn, self._jh_web_port,),
      'mapreduce.task.tmp.dir': self._tmppath('tasks'),
      'mapreduce.shuffle.port': self._mr_shuffle_port,
    }
    self._mapred_site = self._tmppath('conf/mapred-site.xml')
    write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
Ejemplo n.º 5
0
    def _write_mapred_site(self):
        self._jh_port = find_unused_port()
        self._jh_web_port = find_unused_port()
        self._mr_shuffle_port = find_unused_port()

        mapred_configs = {
            "mapred.job.tracker": "%s:%s" % (self._fqdn, self._rm_port),
            "mapreduce.framework.name": "yarn",
            "mapreduce.jobhistory.address": "%s:%s" % (self._fqdn, self._jh_port),
            "mapreduce.jobhistory.webapp.address": "%s:%s" % (self._fqdn, self._jh_web_port),
            "mapreduce.task.tmp.dir": self._tmppath("tasks"),
            "mapreduce.shuffle.port": self._mr_shuffle_port,
        }
        self._mapred_site = self._tmppath("conf/mapred-site.xml")
        write_config(mapred_configs, self._tmppath("conf/mapred-site.xml"))
Ejemplo n.º 6
0
    def _write_yarn_site(self):
        self._rm_resource_port = find_unused_port()
        self._rm_port = find_unused_port()
        self._rm_scheduler_port = find_unused_port()
        self._rm_admin_port = find_unused_port()
        self._rm_webapp_port = find_unused_port()
        self._nm_port = find_unused_port()
        self._nm_webapp_port = find_unused_port()

        yarn_configs = {
            "yarn.resourcemanager.resource-tracker.address": "%s:%s" % (self._fqdn, self._rm_resource_port),
            "yarn.resourcemanager.address": "%s:%s" % (self._fqdn, self._rm_port),
            "yarn.resourcemanager.scheduler.address": "%s:%s"
            % (self._fqdn, 8030),  # self._rm_scheduler_port # /!\ Hardcoded for now
            "yarn.resourcemanager.scheduler.class": "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler",
            "yarn.resourcemanager.admin.address": "%s:%s" % (self._fqdn, self._rm_admin_port),
            "yarn.resourcemanager.webapp.address": "%s:%s" % (self._fqdn, self._rm_webapp_port),
            "yarn.log-aggregation-enable": "true",
            "yarn.dispatcher.exit-on-error": "true",
            "yarn.nodemanager.local-dirs": self._local_dir,
            "yarn.nodemanager.log-dirs": self._logpath("yarn-logs"),
            "yarn.nodemanager.remote-app-log-dir": "/var/log/hadoop-yarn/apps",
            "yarn.nodemanager.localizer.address": "%s:%s" % (self._fqdn, self._nm_port),
            "yarn.nodemanager.aux-services": "mapreduce.shuffle",
            "yarn.nodemanager.aux-services.mapreduce.shuffle.class": "org.apache.hadoop.mapred.ShuffleHandler",
            "yarn.nodemanager.webapp.address": self._nm_webapp_port,
            "yarn.app.mapreduce.am.staging-dir": "/tmp/hadoop-yarn/staging",
            "yarn.application.classpath": """$HADOOP_CONF_DIR,
        $HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
        $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
        $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,
        $HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*""",
        }
        self._yarn_site = self._tmppath("conf/yarn-site.xml")
        write_config(yarn_configs, self._tmppath("conf/yarn-site.xml"))
Ejemplo n.º 7
0
  def _write_core_site(self):
    # Prep user group mapping file
    ugm_properties = self._tmppath('ugm.properties')
    self._write_static_group_mapping(ugm_properties)
    self._namenode_port = find_unused_port()
    self._fs_default_name = 'hdfs://localhost:%s' % (self._namenode_port,)

    core_configs = {
      'fs.default.name': self._fs_default_name,
      'hadoop.security.authorization': 'true',
      'hadoop.security.authentication': 'simple',
      'hadoop.proxyuser.%s.groups' % (self.superuser,): 'users,supergroup',
      'hadoop.proxyuser.%s.hosts' % (self.superuser,): 'localhost',
      'hadoop.tmp.dir': self._tmppath('hadoop_tmp_dir'),
    }
    write_config(core_configs, self._tmppath('conf/core-site.xml'))
Ejemplo n.º 8
0
  def _write_hdfs_site(self):
    self._dfs_http_port = find_unused_port()
    self._dfs_http_address = '%s:%s' % (self._fqdn, self._dfs_http_port)

    hdfs_configs = {
      'dfs.webhdfs.enabled': 'true',
      'dfs.http.address': self._dfs_http_address,
      'dfs.namenode.safemode.extension': 1,
      'dfs.namenode.safemode.threshold-pct': 0,
      'dfs.datanode.address': '%s:0' % self._fqdn,
      # Work around webhdfs redirect bug -- bind to all interfaces
      'dfs.datanode.http.address': '0.0.0.0:0',
      'dfs.datanode.ipc.address': '%s:0' % self._fqdn,
      'dfs.replication': 1,
      'dfs.safemode.min.datanodes': 1,
    }
    write_config(hdfs_configs, self._tmppath('conf/hdfs-site.xml'))
Ejemplo n.º 9
0
  def _write_hdfs_site(self):
    self._dfs_http_port = find_unused_port()
    self._dfs_http_address = 'localhost:%s' % (self._dfs_http_port,)

    hdfs_configs = {
      'dfs.webhdfs.enabled': 'true',
      'dfs.http.address': self._dfs_http_address,
      'dfs.namenode.safemode.extension': 1,
      'dfs.namenode.safemode.threshold-pct': 0,
      'dfs.datanode.address': 'localhost:0',
      # Work around webhdfs redirect bug -- bind to all interfaces
      'dfs.datanode.http.address': '0.0.0.0:0',
      'dfs.datanode.ipc.address': 'localhost:0',
      'dfs.replication': 1,
      'dfs.safemode.min.datanodes': 1,
    }
    write_config(hdfs_configs, self._tmppath('conf/hdfs-site.xml'))
Ejemplo n.º 10
0
  def _write_core_site(self):
    # Prep user group mapping file
    ugm_properties = self._tmppath('ugm.properties')
    self._write_static_group_mapping(ugm_properties)
    self._namenode_port = find_unused_port()
    self._fs_default_name = 'hdfs://localhost:%s' % (self._namenode_port,)

    core_configs = {
      'fs.default.name': self._fs_default_name,
      'hadoop.security.authorization': 'true',
      'hadoop.security.authentication': 'simple',
      'hadoop.proxyuser.hue.hosts': '*',      
      'hadoop.proxyuser.hue.groups': '*',
      'hadoop.proxyuser.%s.hosts' % (getpass.getuser(),): '*',      
      'hadoop.proxyuser.%s.groups' % (getpass.getuser(),): '*',
      'hadoop.tmp.dir': self._tmppath('hadoop_tmp_dir'),
    }
    write_config(core_configs, self._tmppath('conf/core-site.xml'))
Ejemplo n.º 11
0
    def _write_hdfs_site(self):
        self._dfs_http_port = find_unused_port()
        self._dfs_http_address = "%s:%s" % (self._fqdn, self._dfs_http_port)

        hdfs_configs = {
            "dfs.webhdfs.enabled": "true",
            "dfs.http.address": self._dfs_http_address,
            "dfs.namenode.safemode.extension": 1,
            "dfs.namenode.safemode.threshold-pct": 0,
            "dfs.datanode.address": "%s:0" % self._fqdn,
            "dfs.datanode.http.address": "0.0.0.0:0",  # Work around webhdfs redirect bug -- bind to all interfaces
            "dfs.datanode.ipc.address": "%s:0" % self._fqdn,
            "dfs.replication": 1,
            "dfs.safemode.min.datanodes": 1,
            "dfs.namenode.fs-limits.min-block-size": "1000",
            "dfs.permissions": "true",
        }
        self._hdfs_site = self._tmppath("conf/hdfs-site.xml")
        write_config(hdfs_configs, self._hdfs_site)
Ejemplo n.º 12
0
    def _write_hdfs_site(self):
        self._dfs_http_port = find_unused_port()
        self._dfs_http_address = '%s:%s' % (self._fqdn, self._dfs_http_port)

        hdfs_configs = {
            'dfs.webhdfs.enabled': 'true',
            'dfs.http.address': self._dfs_http_address,
            'dfs.namenode.safemode.extension': 1,
            'dfs.namenode.safemode.threshold-pct': 0,
            'dfs.datanode.address': '%s:0' % self._fqdn,
            # Work around webhdfs redirect bug -- bind to all interfaces
            'dfs.datanode.http.address': '0.0.0.0:0',
            'dfs.datanode.ipc.address': '%s:0' % self._fqdn,
            'dfs.replication': 1,
            'dfs.safemode.min.datanodes': 1,
            'dfs.namenode.fs-limits.min-block-size': '1000'
        }
        self._hdfs_site = self._tmppath('conf/hdfs-site.xml')
        write_config(hdfs_configs, self._hdfs_site)
Ejemplo n.º 13
0
    def _write_core_site(self):
        self._namenode_port = find_unused_port()
        self._fs_default_name = "hdfs://%s:%s" % (self._fqdn, self._namenode_port)

        core_configs = {
            "fs.default.name": self._fs_default_name,
            "hadoop.security.authorization": "true",
            "hadoop.security.authentication": "simple",
            "hadoop.proxyuser.hue.hosts": "*",
            "hadoop.proxyuser.hue.groups": "*",
            "hadoop.proxyuser.oozie.hosts": "*",
            "hadoop.proxyuser.oozie.groups": "*",
            "hadoop.proxyuser.%s.hosts" % (getpass.getuser(),): "*",
            "hadoop.proxyuser.%s.groups" % (getpass.getuser(),): "*",
            "hadoop.tmp.dir": self._tmppath("hadoop_tmp_dir"),
            "fs.trash.interval": 10,
        }
        self._core_site = self._tmppath("conf/core-site.xml")
        write_config(core_configs, self._core_site)
Ejemplo n.º 14
0
  def _write_core_site(self):
    self._namenode_port = find_unused_port()
    self._fs_default_name = 'hdfs://%s:%s' % (self._fqdn, self._namenode_port,)

    core_configs = {
      'fs.default.name': self._fs_default_name,
      'hadoop.security.authorization': 'true',
      'hadoop.security.authentication': 'simple',
      'hadoop.proxyuser.hue.hosts': '*',
      'hadoop.proxyuser.hue.groups': '*',
      'hadoop.proxyuser.oozie.hosts': '*',
      'hadoop.proxyuser.oozie.groups': '*',
      'hadoop.proxyuser.%s.hosts' % (getpass.getuser(),): '*',
      'hadoop.proxyuser.%s.groups' % (getpass.getuser(),): '*',
      'hadoop.tmp.dir': self._tmppath('hadoop_tmp_dir'),
      'fs.trash.interval': 10
    }
    self._core_site = self._tmppath('conf/core-site.xml')
    write_config(core_configs, self._core_site)
Ejemplo n.º 15
0
    def _write_core_site(self):
        self._namenode_port = find_unused_port()
        self._fs_default_name = 'hdfs://%s:%s' % (
            self._fqdn,
            self._namenode_port,
        )

        core_configs = {
            'fs.default.name': self._fs_default_name,
            'hadoop.security.authorization': 'true',
            'hadoop.security.authentication': 'simple',
            'hadoop.proxyuser.hue.hosts': '*',
            'hadoop.proxyuser.hue.groups': '*',
            'hadoop.proxyuser.oozie.hosts': '*',
            'hadoop.proxyuser.oozie.groups': '*',
            'hadoop.proxyuser.%s.hosts' % (getpass.getuser(), ): '*',
            'hadoop.proxyuser.%s.groups' % (getpass.getuser(), ): '*',
            'hadoop.tmp.dir': self._tmppath('hadoop_tmp_dir'),
            'fs.trash.interval': 10
        }
        self._core_site = self._tmppath('conf/core-site.xml')
        write_config(core_configs, self._core_site)
Ejemplo n.º 16
0
  def _write_yarn_site(self):
    self._rm_resource_port = find_unused_port()
    self._rm_port = find_unused_port()
    self._rm_scheduler_port = find_unused_port()
    self._rm_admin_port = find_unused_port()
    self._rm_webapp_port = find_unused_port()
    self._nm_port = find_unused_port()
    self._nm_webapp_port = find_unused_port()

    yarn_configs = {
      'yarn.resourcemanager.resource-tracker.address': '%s:%s' % (self._fqdn, self._rm_resource_port,),
      'yarn.resourcemanager.address': '%s:%s' % (self._fqdn, self._rm_port,),
      'yarn.resourcemanager.scheduler.address': '%s:%s' % (self._fqdn, self._rm_scheduler_port,),
      'yarn.resourcemanager.scheduler.class': 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler',
      'yarn.resourcemanager.admin.address': '%s:%s' % (self._fqdn, self._rm_admin_port,),
      'yarn.resourcemanager.webapp.address': '%s:%s' % (self._fqdn, self._rm_webapp_port,),

      'yarn.log-aggregation-enable': 'true',
      'yarn.dispatcher.exit-on-error': 'true',

      'yarn.nodemanager.local-dirs': self._local_dir,
      'yarn.nodemanager.log-dirs': self._logpath('yarn-logs'),
      'yarn.nodemanager.remote-app-log-dir': '/var/log/hadoop-yarn/apps',
      'yarn.nodemanager.localizer.address' : '%s:%s' % (self._fqdn, self._nm_port,),
      'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
      'yarn.nodemanager.aux-services.mapreduce.shuffle.class': 'org.apache.hadoop.mapred.ShuffleHandler',
      'yarn.nodemanager.webapp.address': self._nm_webapp_port,

      'yarn.app.mapreduce.am.staging-dir': '/tmp/hadoop-yarn/staging',

      'yarn.application.classpath':
      '''$HADOOP_CONF_DIR,
        $HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
        $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
        $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,
        $HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*''',
    }
    self._yarn_site = self._tmppath('conf/yarn-site.xml')
    write_config(yarn_configs, self._tmppath('conf/yarn-site.xml'))
Ejemplo n.º 17
0
    def _write_yarn_site(self):
        self._rm_resource_port = find_unused_port()
        self._rm_port = find_unused_port()
        self._rm_scheduler_port = find_unused_port()
        self._rm_admin_port = find_unused_port()
        self._rm_webapp_port = find_unused_port()
        self._nm_port = find_unused_port()
        self._nm_webapp_port = find_unused_port()

        yarn_configs = {
            'yarn.resourcemanager.resource-tracker.address':
            '%s:%s' % (
                self._fqdn,
                self._rm_resource_port,
            ),
            'yarn.resourcemanager.address':
            '%s:%s' % (
                self._fqdn,
                self._rm_port,
            ),
            'yarn.resourcemanager.scheduler.address':
            '%s:%s' % (
                self._fqdn,
                8030,
            ),  #self._rm_scheduler_port # /!\ Hardcoded for now
            'yarn.resourcemanager.scheduler.class':
            'org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler',
            'yarn.resourcemanager.admin.address':
            '%s:%s' % (
                self._fqdn,
                self._rm_admin_port,
            ),
            'yarn.resourcemanager.webapp.address':
            '%s:%s' % (
                self._fqdn,
                self._rm_webapp_port,
            ),
            'yarn.log-aggregation-enable':
            'true',
            'yarn.dispatcher.exit-on-error':
            'true',
            'yarn.nodemanager.local-dirs':
            self._local_dir,
            'yarn.nodemanager.log-dirs':
            self._logpath('yarn-logs'),
            'yarn.nodemanager.remote-app-log-dir':
            '/var/log/hadoop-yarn/apps',
            'yarn.nodemanager.localizer.address':
            '%s:%s' % (
                self._fqdn,
                self._nm_port,
            ),
            'yarn.nodemanager.aux-services':
            'mapreduce.shuffle',
            'yarn.nodemanager.aux-services.mapreduce.shuffle.class':
            'org.apache.hadoop.mapred.ShuffleHandler',
            'yarn.nodemanager.webapp.address':
            self._nm_webapp_port,
            'yarn.app.mapreduce.am.staging-dir':
            '/tmp/hadoop-yarn/staging',
            'yarn.application.classpath':
            '''$HADOOP_CONF_DIR,
        $HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
        $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
        $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,
        $HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*''',
        }
        self._yarn_site = self._tmppath('conf/yarn-site.xml')
        write_config(yarn_configs, self._tmppath('conf/yarn-site.xml'))