def yarn_site_xml_defaults(workdir, node_info): ''' Default entries for the yarn-site.xml config file. ''' mem_dflts = memory_defaults(node_info) ncores = node_info['cores'] max_alloc = round_mb(mem_dflts.ram_per_container * mem_dflts.num_containers) min_alloc = round_mb(mem_dflts.ram_per_container) dflts = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.scheduler.maximum-allocation-mb': max_alloc, 'yarn.scheduler.minimum-allocation-mb': min_alloc, 'yarn.nodemanager.resource.memory-mb': max_alloc, 'yarn.nodemanager.vmem-check-enabled': 'false', 'yarn.nodemanager.vmem-pmem-ratio': 2.1, 'yarn.nodemanager.hostname': '$dataname', 'yarn.nodemanager.webapp.address': '$hostaddress:8042', 'yarn.resourcemanager.hostname': '$masterdataname', 'yarn.resourcemanager.webapp.address': '$masterhostaddress:8088', 'yarn.resourcemanager.webapp.https.address': '$masterhostaddress:8090', 'yarn.resourcemanager.scheduler.class': 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler', 'yarn.scheduler.capacity.allocation.file': 'capacity-scheduler.xml', 'yarn.scheduler.maximum-allocation-vcores': str(ncores), 'yarn.scheduler.minimum-allocation-vcores': '1', 'yarn.nodemanager.resource.cpu-vcores': str(ncores), } return dflts
def yarn_site_xml_defaults(workdir, node_info): ''' Default entries for the yarn-site.xml config file. ''' mem_dflts = memory_defaults(node_info) ncores = node_info['cores'] max_alloc = round_mb(mem_dflts.ram_per_container * mem_dflts.num_containers) min_alloc = round_mb(mem_dflts.ram_per_container) dflts = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.scheduler.maximum-allocation-mb': max_alloc, 'yarn.scheduler.minimum-allocation-mb': min_alloc, 'yarn.nodemanager.resource.memory-mb': max_alloc, 'yarn.nodemanager.vmem-check-enabled':'false', 'yarn.nodemanager.vmem-pmem-ratio': 2.1, 'yarn.nodemanager.hostname': '$dataname', 'yarn.nodemanager.webapp.address': '$hostaddress:8042', 'yarn.resourcemanager.hostname': '$masterdataname', 'yarn.resourcemanager.webapp.address': '$masterhostaddress:8088', 'yarn.resourcemanager.webapp.https.address': '$masterhostaddress:8090', 'yarn.resourcemanager.scheduler.class': 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler', 'yarn.scheduler.capacity.allocation.file': 'capacity-scheduler.xml', 'yarn.scheduler.maximum-allocation-vcores': str(ncores), 'yarn.scheduler.minimum-allocation-vcores': '1', 'yarn.nodemanager.resource.cpu-vcores': str(ncores), } return dflts
def mapred_site_xml_defaults(workdir, node_info): ''' Default entries for the mapred-site.xml config file. ''' mem_dflts = memory_defaults(node_info) java_map_mem = format_memory(0.8 * mem_dflts.ram_per_container, round_val=True) java_reduce_mem = format_memory(0.8 * 2 * mem_dflts.ram_per_container, round_val=True) # In my tests, Yarn gets shirty if I try to run a job and these values are set to # more then 8g: map_memory = round_mb(mem_dflts.ram_per_container) reduce_memory = round_mb(2 * mem_dflts.ram_per_container) dflts = { 'mapreduce.framework.name': 'yarn', 'mapreduce.map.java.opts': '-Xmx%s' % java_map_mem, 'mapreduce.map.memory.mb': map_memory, 'mapreduce.reduce.java.opts': '-Xmx%s' % java_reduce_mem, 'mapreduce.reduce.memory.mb': reduce_memory, # io.sort.mb can't be > 2047mb 'mapreduce.task.io.sort.mb': min(int(0.4 * map_memory), 2047), 'yarn.app.mapreduce.am.staging-dir': '$localworkdir/tmp/hadoop-yarn/staging', } return dflts
def yarn_site_xml_defaults(workdir, node_info): """ Default entries for the yarn-site.xml config file. """ mem_dflts = memory_defaults(node_info) ncores = node_info["cores"] max_alloc = round_mb(mem_dflts.ram_per_container * mem_dflts.num_containers) min_alloc = round_mb(mem_dflts.ram_per_container) dflts = { "yarn.nodemanager.aux-services": "mapreduce_shuffle", "yarn.scheduler.maximum-allocation-mb": max_alloc, "yarn.scheduler.minimum-allocation-mb": min_alloc, "yarn.nodemanager.resource.memory-mb": max_alloc, "yarn.nodemanager.vmem-check-enabled": "false", "yarn.nodemanager.vmem-pmem-ratio": 2.1, "yarn.nodemanager.hostname": "$dataname", "yarn.nodemanager.webapp.address": "$hostaddress:8042", "yarn.resourcemanager.hostname": "$masterdataname", "yarn.resourcemanager.webapp.address": "$masterhostaddress:8088", "yarn.resourcemanager.webapp.https.address": "$masterhostaddress:8090", "yarn.resourcemanager.scheduler.class": "org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler", "yarn.scheduler.capacity.allocation.file": "capacity-scheduler.xml", "yarn.scheduler.maximum-allocation-vcores": str(ncores), "yarn.scheduler.minimum-allocation-vcores": "1", "yarn.nodemanager.resource.cpu-vcores": str(ncores), } return dflts
def test_mapred_site_xml_defaults(self): node = dict(fqdn='hosty.domain.be', network='ib0', pid=1234, cores=24, totalcores=24, usablecores=range(24), num_nodes=1, memory=dict(meminfo=dict(memtotal=68719476736), ulimit='unlimited')) d = hca.mapred_site_xml_defaults('/', node) self.assertEqual(len(d), 7) # Capped at 8g self.assertEqual(d['mapreduce.map.memory.mb'], hcc.round_mb(hcc.parse_memory('2G'))) self.assertEqual(d['mapreduce.reduce.memory.mb'], hcc.round_mb(hcc.parse_memory('4G')))
def test_mapred_site_xml_defaults(self): '''Test mapred defaults; note: only using 4 from 24 cores.''' node = dict(fqdn='hosty.domain.be', network='ib0', pid=1234, cores=4, totalcores=24, usablecores=[0, 1, 2, 3], num_nodes=1, memory=dict(meminfo=dict(memtotal=68719476736), ulimit='unlimited')) d = hca.mapred_site_xml_defaults('/', node) self.assertEqual(len(d), 9) self.assertEqual(d['hadoop.ln.cmd'], '/bin/ln') self.assertEqual(d['lustre.dir'], '$workdir') self.assertEqual(d['mapreduce.map.memory.mb'], hcc.round_mb(hcc.parse_memory('1G'))) self.assertEqual(d['mapreduce.reduce.memory.mb'], hcc.round_mb(hcc.parse_memory('2G')))
def spark_defaults(_, node_info): ''' Generate spark defaults so spark uses all the resources that yarn is able to provide. Defaults here are based on Cloudera's recommendations here: http://blog.cloudera.com/blog/2015/03/how-to-tune-your-apache-spark-jobs-part-2/ We use 2 cores per executor based on discussion found here: http://stackoverflow.com/questions/24622108/apache-spark-the-number-of-cores-vs-the-number-of-executors ''' memory_defaults = hcah.memory_defaults(node_info) num_nodes = node_info['num_nodes'] cores_per_executor = min(2, node_info['cores']) instances_per_node = node_info['cores'] / cores_per_executor # -1 because we want one less executor instance on the application master # If we have only one node then we don't expect the driver to be very busy, so # we can give the executors more memory. instances = max((num_nodes * instances_per_node) - 1, 1) memory = hcac.round_mb(memory_defaults.available_memory / instances_per_node) dflts = { 'spark.executor.cores': cores_per_executor, 'spark.executor.instances': instances, 'spark.executor.memory': str(memory) + 'M', 'spark.local.dir': tempfile.gettempdir(), } return dflts
def test_mapred_site_xml_defaults(self): node = dict( fqdn="hosty.domain.be", network="ib0", pid=1234, cores=24, totalcores=24, usablecores=range(24), num_nodes=1, memory=dict(meminfo=dict(memtotal=68719476736), ulimit="unlimited"), ) d = hca.mapred_site_xml_defaults("/", node) self.assertEqual(len(d), 7) # Capped at 8g self.assertEqual(d["mapreduce.map.memory.mb"], hcc.round_mb(hcc.parse_memory("2G"))) self.assertEqual(d["mapreduce.reduce.memory.mb"], hcc.round_mb(hcc.parse_memory("4G")))
def test_yarn_site_xml_defaults(self): node = dict(fqdn='hosty.domain.be', network='ib0', pid=1234, cores=24, totalcores=24, usablecores=range(24), num_nodes=1, memory=dict(meminfo=dict(memtotal=68719476736), ulimit='unlimited')) d = hca.yarn_site_xml_defaults('/', node) self.assertEqual(len(d), 16) self.assertEqual(d['yarn.nodemanager.resource.memory-mb'], hcc.round_mb(hcc.parse_memory('56G'))) self.assertEqual(d['yarn.resourcemanager.webapp.address'], '$masterhostaddress:8088') self.assertEqual(d['yarn.resourcemanager.webapp.https.address'], '$masterhostaddress:8090') self.assertEqual(d['yarn.nodemanager.hostname'], '$dataname') self.assertEqual(d['yarn.nodemanager.webapp.address'], '$hostaddress:8042') self.assertEqual(d['yarn.scheduler.minimum-allocation-mb'], hcc.round_mb(hcc.parse_memory('2G'))) self.assertEqual(d['yarn.scheduler.maximum-allocation-mb'], hcc.round_mb(hcc.parse_memory('56G'))) self.assertEqual(d['yarn.scheduler.maximum-allocation-vcores'], '24') self.assertEqual(d['yarn.scheduler.minimum-allocation-vcores'], '1') self.assertEqual(d['yarn.nodemanager.resource.cpu-vcores'], '24')
def test_yarn_site_xml_defaults(self): node = dict( fqdn="hosty.domain.be", network="ib0", pid=1234, cores=24, totalcores=24, usablecores=range(24), num_nodes=1, memory=dict(meminfo=dict(memtotal=68719476736), ulimit="unlimited"), ) d = hca.yarn_site_xml_defaults("/", node) self.assertEqual(len(d), 16) self.assertEqual(d["yarn.nodemanager.resource.memory-mb"], hcc.round_mb(hcc.parse_memory("56G"))) self.assertEqual(d["yarn.resourcemanager.webapp.address"], "$masterhostaddress:8088") self.assertEqual(d["yarn.resourcemanager.webapp.https.address"], "$masterhostaddress:8090") self.assertEqual(d["yarn.nodemanager.hostname"], "$dataname") self.assertEqual(d["yarn.nodemanager.webapp.address"], "$hostaddress:8042") self.assertEqual(d["yarn.scheduler.minimum-allocation-mb"], hcc.round_mb(hcc.parse_memory("2G"))) self.assertEqual(d["yarn.scheduler.maximum-allocation-mb"], hcc.round_mb(hcc.parse_memory("56G"))) self.assertEqual(d["yarn.scheduler.maximum-allocation-vcores"], "24") self.assertEqual(d["yarn.scheduler.minimum-allocation-vcores"], "1") self.assertEqual(d["yarn.nodemanager.resource.cpu-vcores"], "24")
def test_round_mb(self): mb = 1024 self.assertEqual(hcc.round_mb(512 * (1024**3)), 512 * mb) self.assertEqual(hcc.round_mb(64 * (1024**3)), 64 * mb) self.assertEqual(hcc.round_mb(32 * (1024**3)), 32 * mb) self.assertEqual(hcc.round_mb(32 * (1024**3) + 100*(1024**2)), 32 * mb) self.assertEqual(hcc.round_mb(32 * (1024**3) - 100*(1024**2)), 32 * mb - 1024) self.assertEqual(hcc.round_mb(16 * (1024**3)), 16 * mb) self.assertEqual(hcc.round_mb(8 * (1024**3)), 8 * mb) self.assertEqual(hcc.round_mb(4 * (1024**3)), 4 * mb) self.assertEqual(hcc.round_mb(3 * (1024**3)), 3 * mb) self.assertEqual(hcc.round_mb(2 * (1024**3)), 2 * mb) self.assertEqual(hcc.round_mb(1 * (1024**3)), 1 * mb)
def test_round_mb(self): mb = 1024 self.assertEqual(hcc.round_mb(512 * (1024**3)), 512 * mb) self.assertEqual(hcc.round_mb(64 * (1024**3)), 64 * mb) self.assertEqual(hcc.round_mb(32 * (1024**3)), 32 * mb) self.assertEqual(hcc.round_mb(32 * (1024**3) + 100 * (1024**2)), 32 * mb) self.assertEqual(hcc.round_mb(32 * (1024**3) - 100 * (1024**2)), 32 * mb - 1024) self.assertEqual(hcc.round_mb(16 * (1024**3)), 16 * mb) self.assertEqual(hcc.round_mb(8 * (1024**3)), 8 * mb) self.assertEqual(hcc.round_mb(4 * (1024**3)), 4 * mb) self.assertEqual(hcc.round_mb(3 * (1024**3)), 3 * mb) self.assertEqual(hcc.round_mb(2 * (1024**3)), 2 * mb) self.assertEqual(hcc.round_mb(1 * (1024**3)), 1 * mb)