def _write_yarn_site(self): self._rm_resource_port = find_unused_port() self._rm_port = find_unused_port() self._rm_scheduler_port = find_unused_port() self._rm_admin_port = find_unused_port() self._rm_webapp_port = find_unused_port() self._nm_port = find_unused_port() self._nm_webapp_port = find_unused_port() yarn_configs = { "yarn.resourcemanager.resource-tracker.address": "%s:%s" % (self._fqdn, self._rm_resource_port), "yarn.resourcemanager.address": "%s:%s" % (self._fqdn, self._rm_port), "yarn.resourcemanager.scheduler.address": "%s:%s" % (self._fqdn, 8030), # self._rm_scheduler_port # /!\ Hardcoded for now "yarn.resourcemanager.scheduler.class": "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler", "yarn.resourcemanager.admin.address": "%s:%s" % (self._fqdn, self._rm_admin_port), "yarn.resourcemanager.webapp.address": "%s:%s" % (self._fqdn, self._rm_webapp_port), "yarn.log-aggregation-enable": "true", "yarn.dispatcher.exit-on-error": "true", "yarn.nodemanager.local-dirs": self._local_dir, "yarn.nodemanager.log-dirs": self._logpath("yarn-logs"), "yarn.nodemanager.remote-app-log-dir": "/var/log/hadoop-yarn/apps", "yarn.nodemanager.localizer.address": "%s:%s" % (self._fqdn, self._nm_port), "yarn.nodemanager.aux-services": "mapreduce.shuffle", "yarn.nodemanager.aux-services.mapreduce.shuffle.class": "org.apache.hadoop.mapred.ShuffleHandler", "yarn.nodemanager.webapp.address": self._nm_webapp_port, "yarn.app.mapreduce.am.staging-dir": "/tmp/hadoop-yarn/staging", "yarn.application.classpath": """$HADOOP_CONF_DIR, $HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*, $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*, $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*, $HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*""", } self._yarn_site = self._tmppath("conf/yarn-site.xml") write_config(yarn_configs, self._tmppath("conf/yarn-site.xml"))
def _write_oozie_site(cls, cluster): oozie_configs = { 'oozie.service.ProxyUserService.proxyuser.hue.hosts': '*', 'oozie.service.ProxyUserService.proxyuser.hue.groups': '*', 'oozie.service.HadoopAccessorService.hadoop.configurations': '*=%s' % cluster._tmppath('conf'), 'oozie.db.schema.name': 'oozie', 'oozie.data.dir': cluster._tmppath('oozie_tmp_dir'), 'oozie.service.JPAService.create.db.schema': 'false', 'oozie.service.JPAService.jdbc.driver': 'org.apache.derby.jdbc.EmbeddedDriver', 'oozie.service.JPAService.jdbc.url': 'jdbc:derby:${oozie.data.dir}/${oozie.db.schema.name}-db;create=true', 'oozie.service.JPAService.jdbc.username': '******', 'oozie.service.JPAService.jdbc.password': '', 'oozie.service.SchemaService.wf.ext.schemas': '''shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,email-action-0.1.xsd,hive-action-0.2.xsd, hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,sqoop-action-0.3.xsd, sqoop-action-0.4.xsd,ssh-action-0.1.xsd,ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd, oozie-sla-0.1.xsd,oozie-sla-0.2.xsd, hive2-action-0.1.xsd, spark-action-0.1.xsd''', 'oozie.service.ActionService.executor.ext.classes': '''org.apache.oozie.action.email.EmailActionExecutor, org.apache.oozie.action.hadoop.HiveActionExecutor, org.apache.oozie.action.hadoop.ShellActionExecutor, org.apache.oozie.action.hadoop.SqoopActionExecutor, org.apache.oozie.action.hadoop.DistcpActionExecutor, org.apache.oozie.action.hadoop.Hive2ActionExecutor, org.apache.oozie.action.ssh.SshActionExecutor, org.apache.oozie.action.oozie.SubWorkflowActionExecutor, org.apache.oozie.action.hadoop.SparkActionExecutor''', 'oozie.service.coord.normal.default.timeout': 120 } write_config(oozie_configs, cluster._tmppath('conf/oozie/oozie-site.xml'))
def _write_mapred_site(self): self._jt_thrift_port = find_unused_port() self._jt_http_port = find_unused_port() self._jt_port = find_unused_port() self._tt_http_port = find_unused_port() mapred_configs = { 'mapred.job.tracker': '%s:%s' % ( self._fqdn, self._jt_port, ), 'mapred.job.tracker.http.address': '%s:%s' % ( self._fqdn, self._jt_http_port, ), 'jobtracker.thrift.address': '%s:%s' % ( self._fqdn, self._jt_thrift_port, ), 'mapred.jobtracker.plugins': 'org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin', 'mapred.task.tracker.http.address': '%s:%s' % ( self._fqdn, self._tt_http_port, ), } self._mapred_site = self._tmppath('conf/mapred-site.xml') write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
def _write_oozie_site(cls, cluster): oozie_configs = { "oozie.service.ProxyUserService.proxyuser.hue.hosts": "*", "oozie.service.ProxyUserService.proxyuser.hue.groups": "*", "oozie.service.HadoopAccessorService.hadoop.configurations": "*=%s" % cluster._tmppath("conf"), "oozie.db.schema.name": "oozie", "oozie.data.dir": cluster._tmppath("oozie_tmp_dir"), "oozie.service.JPAService.create.db.schema": "false", "oozie.service.JPAService.jdbc.driver": "org.apache.derby.jdbc.EmbeddedDriver", "oozie.service.JPAService.jdbc.url": "jdbc:derby:${oozie.data.dir}/${oozie.db.schema.name}-db;create=true", "oozie.service.JPAService.jdbc.username": "******", "oozie.service.JPAService.jdbc.password": "", "oozie.service.SchemaService.wf.ext.schemas": """shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,email-action-0.1.xsd,hive-action-0.2.xsd, hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,sqoop-action-0.3.xsd, sqoop-action-0.4.xsd,ssh-action-0.1.xsd,ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd, oozie-sla-0.1.xsd,oozie-sla-0.2.xsd, hive2-action-0.1.xsd, spark-action-0.1.xsd""", "oozie.service.ActionService.executor.ext.classes": """org.apache.oozie.action.email.EmailActionExecutor, org.apache.oozie.action.hadoop.HiveActionExecutor, org.apache.oozie.action.hadoop.ShellActionExecutor, org.apache.oozie.action.hadoop.SqoopActionExecutor, org.apache.oozie.action.hadoop.DistcpActionExecutor, org.apache.oozie.action.hadoop.Hive2ActionExecutor, org.apache.oozie.action.ssh.SshActionExecutor, org.apache.oozie.action.oozie.SubWorkflowActionExecutor, org.apache.oozie.action.hadoop.SparkActionExecutor""", "oozie.service.coord.normal.default.timeout": 120, } write_config(oozie_configs, cluster._tmppath("conf/oozie/oozie-site.xml"))
def _write_mapred_site(self): self._jh_port = find_unused_port() self._jh_web_port = find_unused_port() self._mr_shuffle_port = find_unused_port() mapred_configs = { 'mapred.job.tracker': '%s:%s' % ( self._fqdn, self._rm_port, ), 'mapreduce.framework.name': 'yarn', 'mapreduce.jobhistory.address': '%s:%s' % ( self._fqdn, self._jh_port, ), 'mapreduce.jobhistory.webapp.address': '%s:%s' % ( self._fqdn, self._jh_web_port, ), 'mapreduce.task.tmp.dir': self._tmppath('tasks'), 'mapreduce.shuffle.port': self._mr_shuffle_port, } self._mapred_site = self._tmppath('conf/mapred-site.xml') write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
def _write_mapred_site(self): self._jt_thrift_port = find_unused_port() self._jt_http_port = find_unused_port() self._jt_port = find_unused_port() self._tt_http_port = find_unused_port() mapred_configs = { 'mapred.job.tracker': '%s:%s' % (self._fqdn, self._jt_port,), 'mapred.job.tracker.http.address': '%s:%s' % (self._fqdn, self._jt_http_port,), 'jobtracker.thrift.address': '%s:%s' % (self._fqdn, self._jt_thrift_port,), 'mapred.jobtracker.plugins': 'org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin', 'mapred.task.tracker.http.address': '%s:%s' % (self._fqdn, self._tt_http_port,), } write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
def _write_mapred_site(self): self._jh_port = find_unused_port() self._jh_web_port = find_unused_port() self._mr_shuffle_port = find_unused_port() mapred_configs = { 'mapred.job.tracker': '%s:%s' % (self._fqdn, self._rm_port,), 'mapreduce.framework.name': 'yarn', 'mapreduce.jobhistory.address': '%s:%s' % (self._fqdn, self._jh_port,), 'mapreduce.jobhistory.webapp.address': '%s:%s' % (self._fqdn, self._jh_web_port,), 'mapreduce.task.tmp.dir': self._tmppath('tasks'), 'mapreduce.shuffle.port': self._mr_shuffle_port, } self._mapred_site = self._tmppath('conf/mapred-site.xml') write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
def _write_mapred_site(self): self._jh_port = find_unused_port() self._jh_web_port = find_unused_port() self._mr_shuffle_port = find_unused_port() mapred_configs = { "mapred.job.tracker": "%s:%s" % (self._fqdn, self._rm_port), "mapreduce.framework.name": "yarn", "mapreduce.jobhistory.address": "%s:%s" % (self._fqdn, self._jh_port), "mapreduce.jobhistory.webapp.address": "%s:%s" % (self._fqdn, self._jh_web_port), "mapreduce.task.tmp.dir": self._tmppath("tasks"), "mapreduce.shuffle.port": self._mr_shuffle_port, } self._mapred_site = self._tmppath("conf/mapred-site.xml") write_config(mapred_configs, self._tmppath("conf/mapred-site.xml"))
def _write_core_site(self): # Prep user group mapping file ugm_properties = self._tmppath('ugm.properties') self._write_static_group_mapping(ugm_properties) self._namenode_port = find_unused_port() self._fs_default_name = 'hdfs://localhost:%s' % (self._namenode_port,) core_configs = { 'fs.default.name': self._fs_default_name, 'hadoop.security.authorization': 'true', 'hadoop.security.authentication': 'simple', 'hadoop.proxyuser.%s.groups' % (self.superuser,): 'users,supergroup', 'hadoop.proxyuser.%s.hosts' % (self.superuser,): 'localhost', 'hadoop.tmp.dir': self._tmppath('hadoop_tmp_dir'), } write_config(core_configs, self._tmppath('conf/core-site.xml'))
def _write_hdfs_site(self): self._dfs_http_port = find_unused_port() self._dfs_http_address = 'localhost:%s' % (self._dfs_http_port,) hdfs_configs = { 'dfs.webhdfs.enabled': 'true', 'dfs.http.address': self._dfs_http_address, 'dfs.namenode.safemode.extension': 1, 'dfs.namenode.safemode.threshold-pct': 0, 'dfs.datanode.address': 'localhost:0', # Work around webhdfs redirect bug -- bind to all interfaces 'dfs.datanode.http.address': '0.0.0.0:0', 'dfs.datanode.ipc.address': 'localhost:0', 'dfs.replication': 1, 'dfs.safemode.min.datanodes': 1, } write_config(hdfs_configs, self._tmppath('conf/hdfs-site.xml'))
def _write_hdfs_site(self): self._dfs_http_port = find_unused_port() self._dfs_http_address = '%s:%s' % (self._fqdn, self._dfs_http_port) hdfs_configs = { 'dfs.webhdfs.enabled': 'true', 'dfs.http.address': self._dfs_http_address, 'dfs.namenode.safemode.extension': 1, 'dfs.namenode.safemode.threshold-pct': 0, 'dfs.datanode.address': '%s:0' % self._fqdn, # Work around webhdfs redirect bug -- bind to all interfaces 'dfs.datanode.http.address': '0.0.0.0:0', 'dfs.datanode.ipc.address': '%s:0' % self._fqdn, 'dfs.replication': 1, 'dfs.safemode.min.datanodes': 1, } write_config(hdfs_configs, self._tmppath('conf/hdfs-site.xml'))
def _write_core_site(self): # Prep user group mapping file ugm_properties = self._tmppath('ugm.properties') self._write_static_group_mapping(ugm_properties) self._namenode_port = find_unused_port() self._fs_default_name = 'hdfs://localhost:%s' % (self._namenode_port,) core_configs = { 'fs.default.name': self._fs_default_name, 'hadoop.security.authorization': 'true', 'hadoop.security.authentication': 'simple', 'hadoop.proxyuser.hue.hosts': '*', 'hadoop.proxyuser.hue.groups': '*', 'hadoop.proxyuser.%s.hosts' % (getpass.getuser(),): '*', 'hadoop.proxyuser.%s.groups' % (getpass.getuser(),): '*', 'hadoop.tmp.dir': self._tmppath('hadoop_tmp_dir'), } write_config(core_configs, self._tmppath('conf/core-site.xml'))
def _write_core_site(self): self._namenode_port = find_unused_port() self._fs_default_name = 'hdfs://%s:%s' % (self._fqdn, self._namenode_port,) core_configs = { 'fs.default.name': self._fs_default_name, 'hadoop.security.authorization': 'true', 'hadoop.security.authentication': 'simple', 'hadoop.proxyuser.hue.hosts': '*', 'hadoop.proxyuser.hue.groups': '*', 'hadoop.proxyuser.oozie.hosts': '*', 'hadoop.proxyuser.oozie.groups': '*', 'hadoop.proxyuser.%s.hosts' % (getpass.getuser(),): '*', 'hadoop.proxyuser.%s.groups' % (getpass.getuser(),): '*', 'hadoop.tmp.dir': self._tmppath('hadoop_tmp_dir'), 'fs.trash.interval': 10 } self._core_site = self._tmppath('conf/core-site.xml') write_config(core_configs, self._core_site)
def _write_hdfs_site(self): self._dfs_http_port = find_unused_port() self._dfs_http_address = '%s:%s' % (self._fqdn, self._dfs_http_port) hdfs_configs = { 'dfs.webhdfs.enabled': 'true', 'dfs.http.address': self._dfs_http_address, 'dfs.namenode.safemode.extension': 1, 'dfs.namenode.safemode.threshold-pct': 0, 'dfs.datanode.address': '%s:0' % self._fqdn, 'dfs.datanode.http.address': '0.0.0.0:0', # Work around webhdfs redirect bug -- bind to all interfaces 'dfs.datanode.ipc.address': '%s:0' % self._fqdn, 'dfs.replication': 1, 'dfs.safemode.min.datanodes': 1, 'dfs.namenode.fs-limits.min-block-size': '1000', 'dfs.permissions': 'true' } self._hdfs_site = self._tmppath('conf/hdfs-site.xml') write_config(hdfs_configs, self._hdfs_site)
def _write_hdfs_site(self): self._dfs_http_port = find_unused_port() self._dfs_http_address = "%s:%s" % (self._fqdn, self._dfs_http_port) hdfs_configs = { "dfs.webhdfs.enabled": "true", "dfs.http.address": self._dfs_http_address, "dfs.namenode.safemode.extension": 1, "dfs.namenode.safemode.threshold-pct": 0, "dfs.datanode.address": "%s:0" % self._fqdn, "dfs.datanode.http.address": "0.0.0.0:0", # Work around webhdfs redirect bug -- bind to all interfaces "dfs.datanode.ipc.address": "%s:0" % self._fqdn, "dfs.replication": 1, "dfs.safemode.min.datanodes": 1, "dfs.namenode.fs-limits.min-block-size": "1000", "dfs.permissions": "true", } self._hdfs_site = self._tmppath("conf/hdfs-site.xml") write_config(hdfs_configs, self._hdfs_site)
def _write_core_site(self): self._namenode_port = find_unused_port() self._fs_default_name = "hdfs://%s:%s" % (self._fqdn, self._namenode_port) core_configs = { "fs.default.name": self._fs_default_name, "hadoop.security.authorization": "true", "hadoop.security.authentication": "simple", "hadoop.proxyuser.hue.hosts": "*", "hadoop.proxyuser.hue.groups": "*", "hadoop.proxyuser.oozie.hosts": "*", "hadoop.proxyuser.oozie.groups": "*", "hadoop.proxyuser.%s.hosts" % (getpass.getuser(),): "*", "hadoop.proxyuser.%s.groups" % (getpass.getuser(),): "*", "hadoop.tmp.dir": self._tmppath("hadoop_tmp_dir"), "fs.trash.interval": 10, } self._core_site = self._tmppath("conf/core-site.xml") write_config(core_configs, self._core_site)
def _write_yarn_site(self): self._rm_resource_port = find_unused_port() self._rm_port = find_unused_port() self._rm_scheduler_port = find_unused_port() self._rm_admin_port = find_unused_port() self._rm_webapp_port = find_unused_port() self._nm_port = find_unused_port() self._nm_webapp_port = find_unused_port() yarn_configs = { 'yarn.resourcemanager.resource-tracker.address': '%s:%s' % (self._fqdn, self._rm_resource_port,), 'yarn.resourcemanager.address': '%s:%s' % (self._fqdn, self._rm_port,), 'yarn.resourcemanager.scheduler.address': '%s:%s' % (self._fqdn, self._rm_scheduler_port,), 'yarn.resourcemanager.scheduler.class': 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler', 'yarn.resourcemanager.admin.address': '%s:%s' % (self._fqdn, self._rm_admin_port,), 'yarn.resourcemanager.webapp.address': '%s:%s' % (self._fqdn, self._rm_webapp_port,), 'yarn.log-aggregation-enable': 'true', 'yarn.dispatcher.exit-on-error': 'true', 'yarn.nodemanager.local-dirs': self._local_dir, 'yarn.nodemanager.log-dirs': self._logpath('yarn-logs'), 'yarn.nodemanager.remote-app-log-dir': '/var/log/hadoop-yarn/apps', 'yarn.nodemanager.localizer.address' : '%s:%s' % (self._fqdn, self._nm_port,), 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.nodemanager.aux-services.mapreduce.shuffle.class': 'org.apache.hadoop.mapred.ShuffleHandler', 'yarn.nodemanager.webapp.address': '%s:%s' % (self._fqdn, self._nm_webapp_port,), 'yarn.app.mapreduce.am.staging-dir': '/tmp/hadoop-yarn/staging', 'yarn.application.classpath': '''$HADOOP_CONF_DIR, $HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*, $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*, $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*, $HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*''', } self._yarn_site = self._tmppath('conf/yarn-site.xml') write_config(yarn_configs, self._tmppath('conf/yarn-site.xml'))