def mapred_site_xml_defaults(workdir, node_info): ''' Default entries for the mapred-site.xml config file. ''' mem_dflts = memory_defaults(node_info) java_map_mem = format_memory(0.8 * mem_dflts.ram_per_container, round_val=True) java_reduce_mem = format_memory(0.8 * 2 * mem_dflts.ram_per_container, round_val=True) # In my tests, Yarn gets shirty if I try to run a job and these values are set to # more then 8g: map_memory = round_mb(mem_dflts.ram_per_container) reduce_memory = round_mb(2 * mem_dflts.ram_per_container) dflts = { 'mapreduce.framework.name': 'yarn', 'mapreduce.map.java.opts': '-Xmx%s' % java_map_mem, 'mapreduce.map.memory.mb': map_memory, 'mapreduce.reduce.java.opts': '-Xmx%s' % java_reduce_mem, 'mapreduce.reduce.memory.mb': reduce_memory, # io.sort.mb can't be > 2047mb 'mapreduce.task.io.sort.mb': min(int(0.4 * map_memory), 2047), 'yarn.app.mapreduce.am.staging-dir': '$localworkdir/tmp/hadoop-yarn/staging', } return dflts
def test_format_memory(self): pm = hcc.parse_memory self.assertEqual(hcc.format_memory(1), '1b') self.assertEqual(hcc.format_memory(1024), '1k') self.assertEqual(hcc.format_memory(2000), '2000b') self.assertEqual(hcc.format_memory(1024*1024), '1m') self.assertEqual(hcc.format_memory(1024*1024, round_val=True), '1m') self.assertEqual(hcc.format_memory(pm('0.5t')), '512g') self.assertEqual(hcc.format_memory(pm('0.5t'), round_val=True), '512g') self.assertEqual(hcc.format_memory(pm('8g')), '8g') self.assertEqual(hcc.format_memory(pm('9t')), '9t') self.assertEqual(hcc.format_memory(pm('7.5m')), '7680k') self.assertEqual(hcc.format_memory(pm('7.5m'), round_val=True), '8m') # e.g. from our high memory machines self.assertEqual(hcc.format_memory(540950507520, round_val=True), '504g')
def test_format_memory(self): pm = hcc.parse_memory self.assertEqual(hcc.format_memory(1), '1b') self.assertEqual(hcc.format_memory(1024), '1k') self.assertEqual(hcc.format_memory(2000), '2000b') self.assertEqual(hcc.format_memory(1024 * 1024), '1m') self.assertEqual(hcc.format_memory(1024 * 1024, round_val=True), '1m') self.assertEqual(hcc.format_memory(pm('0.5t')), '512g') self.assertEqual(hcc.format_memory(pm('0.5t'), round_val=True), '512g') self.assertEqual(hcc.format_memory(pm('8g')), '8g') self.assertEqual(hcc.format_memory(pm('9t')), '9t') self.assertEqual(hcc.format_memory(pm('7.5m')), '7680k') self.assertEqual(hcc.format_memory(pm('7.5m'), round_val=True), '8m') # e.g. from our high memory machines self.assertEqual(hcc.format_memory(540950507520, round_val=True), '504g')