def make_tmp_dir_and_mrjob_conf(self): self.tmp_dir = tempfile.mkdtemp() self.mrjob_conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf({'runners': {'emr': { 'check_emr_status_every': 0.01, 's3_sync_wait_time': 0.01, }}}, open(self.mrjob_conf_path, 'w'))
def test_round_trip(self): conf = {'runners': {'foo': {'qux': 'quux'}}} conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf(conf, open(conf_path, 'w')) with no_handlers_for_logger('mrjob.conf'): self.assertEqual(conf, load_mrjob_conf(conf_path=conf_path))
def test_include_relative_to_real_path(self): os.mkdir(os.path.join(self.tmp_dir, 'conf')) base_conf_path = os.path.join(self.tmp_dir, 'conf', 'mrjob.base.conf') real_base_conf_path = os.path.realpath(base_conf_path) conf_path = os.path.join(self.tmp_dir, 'conf', 'mrjob.conf') conf_symlink_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(base_conf_path, 'w') as f: dump_mrjob_conf({}, f) with open(conf_path, 'w') as f: dump_mrjob_conf({'include': 'mrjob.base.conf'}, f) os.symlink(os.path.join('conf', 'mrjob.conf'), conf_symlink_path) self.assertEqual( load_opts_from_mrjob_conf('foo', conf_path), [(real_base_conf_path, {}), (conf_path, {})]) # relative include should work from the symlink even though # it's not in the same directory as mrjob.base.conf self.assertEqual( load_opts_from_mrjob_conf('foo', conf_symlink_path), [(real_base_conf_path, {}), (conf_symlink_path, {})])
def test_can_turn_off_bootstrap_mrjob(self): # track the dir we're loading mrjob from rather than the full path # to deal with edge cases where we load from the .py file, # and the script loads from the .pyc compiled from that .py file. our_mrjob_dir = os.path.dirname(os.path.realpath(mrjob.__file__)) self.mrjob_conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf({'runners': { 'local': { 'bootstrap_mrjob': False } }}, open(self.mrjob_conf_path, 'w')) mr_job = MRJobWhereAreYou(['-c', self.mrjob_conf_path, '-r', 'local']) mr_job.sandbox() with mr_job.make_runner() as runner: # sanity check self.assertEqual(runner.get_opts()['bootstrap_mrjob'], False) runner.run() output = list(runner.stream_output()) self.assertEqual(len(output), 1) # script should load mrjob from the same place our test does _, script_mrjob_dir = mr_job.parse_output_line(output[0]) self.assertEqual(our_mrjob_dir, script_mrjob_dir)
def test_can_turn_off_bootstrap_mrjob(self): # track the dir we're loading mrjob from rather than the full path # to deal with edge cases where we load from the .py file, # and the script loads from the .pyc compiled from that .py file. our_mrjob_dir = os.path.dirname(os.path.realpath(mrjob.__file__)) self.mrjob_conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf({'runners': {'local': {'bootstrap_mrjob': False}}}, open(self.mrjob_conf_path, 'w')) mr_job = MRJobWhereAreYou(['-c', self.mrjob_conf_path]) mr_job.sandbox() with mr_job.make_runner() as runner: # sanity check self.assertEqual(runner.get_opts()['bootstrap_mrjob'], False) runner.run() output = list(runner.stream_output()) self.assertEqual(len(output), 1) # script should load mrjob from the same place our test does _, script_mrjob_dir = mr_job.parse_output_line(output[0]) self.assertEqual(our_mrjob_dir, script_mrjob_dir)
def _test_round_trip(self, conf): conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(conf_path, 'w') as f: dump_mrjob_conf(conf, f) with no_handlers_for_logger('mrjob.conf'): self.assertEqual(conf, load_mrjob_conf(conf_path=conf_path))
def test_round_trip(self): conf = {"runners": {"foo": {"qux": "quux"}}} conf_path = os.path.join(self.tmp_dir, "mrjob.conf") dump_mrjob_conf(conf, open(conf_path, "w")) with no_handlers_for_logger("mrjob.conf"): self.assertEqual(conf, load_mrjob_conf(conf_path=conf_path))
def test_duplicate_conf_path(self): conf_path = os.path.join(self.tmp_dir, "mrjob.conf") with open(conf_path, "w") as f: dump_mrjob_conf({}, f) self.assertEqual(load_opts_from_mrjob_confs("foo", [conf_path, conf_path]), [(conf_path, {})])
def make_mrjob_conf(self): _, self.mrjob_conf_path = tempfile.mkstemp(prefix='mrjob.conf.') dump_mrjob_conf({'runners': {'emr': { 'check_emr_status_every': 0.01, 's3_scratch_uri': 's3://walrus/tmp', 's3_sync_wait_time': 0.01, }}}, open(self.mrjob_conf_path, 'w'))
def _test_round_trip(self, conf): conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(conf_path, 'w') as f: dump_mrjob_conf(conf, f) self.assertEqual(conf, load_mrjob_conf(conf_path=conf_path))
def _test_round_trip(self, conf): conf_path = os.path.join(self.tmp_dir, "mrjob.conf") with open(conf_path, "w") as f: dump_mrjob_conf(conf, f) with no_handlers_for_logger("mrjob.conf"): self.assertEqual(conf, load_mrjob_conf(conf_path=conf_path))
def test_recursive_include(self): conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(conf_path, 'w') as f: dump_mrjob_conf({'include': conf_path}, f) self.assertEqual(load_opts_from_mrjob_conf('foo', conf_path), [(conf_path, {})])
def test_recursive_include(self): conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(conf_path, 'w') as f: dump_mrjob_conf({'include': conf_path}, f) self.assertEqual( load_opts_from_mrjob_conf('foo', conf_path), [(conf_path, {})])
def test_using_json_and_not_yaml(self): conf = {'runners': {'foo': {'qux': 'quux'}}} conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf(conf, open(conf_path, 'w')) contents = open(conf_path).read() assert_equal(contents.replace(' ', '').replace('\n', ''), '{"runners":{"foo":{"qux":"quux"}}}')
def make_tmp_dir_and_mrjob_conf(self): self.tmp_dir = tempfile.mkdtemp() self.mrjob_conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf({'runners': {'emr': { 'check_emr_status_every': 0.01, 's3_sync_wait_time': 0.01, 'aws_availability_zone': 'PUPPYLAND', 'additional_emr_info': {'key': 'value'}, }}}, open(self.mrjob_conf_path, 'w'))
def test_using_json_and_not_yaml(self): conf = {"runners": {"foo": {"qux": "quux"}}} conf_path = os.path.join(self.tmp_dir, "mrjob.conf") dump_mrjob_conf(conf, open(conf_path, "w")) with open(conf_path) as f: contents = f.read() self.assertEqual(contents.replace(" ", "").replace("\n", ""), '{"runners":{"foo":{"qux":"quux"}}}')
def test_duplicate_conf_path(self): conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(conf_path, 'w') as f: dump_mrjob_conf({}, f) self.assertEqual( load_opts_from_mrjob_confs('foo', [conf_path, conf_path]), [(conf_path, {})])
def test_using_json_and_not_yaml(self): conf = {'runners': {'foo': {'qux': 'quux'}}} conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf(conf, open(conf_path, 'w')) with open(conf_path) as f: contents = f.read() self.assertEqual(contents.replace(' ', '').replace('\n', ''), '{"runners":{"foo":{"qux":"quux"}}}')
def test_doubly_recursive_include(self): conf_path_1 = os.path.join(self.tmp_dir, "mrjob.1.conf") conf_path_2 = os.path.join(self.tmp_dir, "mrjob.2.conf") with open(conf_path_1, "w") as f: dump_mrjob_conf({"include": conf_path_2}, f) with open(conf_path_2, "w") as f: dump_mrjob_conf({"include": conf_path_1}, f) self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path_1), [(conf_path_2, {}), (conf_path_1, {})])
def test_doubly_recursive_include(self): conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf') conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf') with open(conf_path_1, 'w') as f: dump_mrjob_conf({'include': conf_path_2}, f) with open(conf_path_2, 'w') as f: dump_mrjob_conf({'include': conf_path_1}, f) self.assertEqual(load_opts_from_mrjob_conf('foo', conf_path_1), [(conf_path_2, {}), (conf_path_1, {})])
def test_recurse(self): path = os.path.join(self.tmp_dir, 'LOL.conf') recurse_conf = dict(include=path) with open(path, 'w') as f: dump_mrjob_conf(recurse_conf, f) stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.conf', stderr) RunnerOptionStore('inline', {}, [path]) self.assertIn('%s tries to recursively include %s!' % (path, path), stderr.getvalue())
def test_recurse(self): path = os.path.join(self.tmp_dir, 'LOL.conf') recurse_conf = dict(include=path) with open(path, 'w') as f: dump_mrjob_conf(recurse_conf, f) stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.conf', stderr) InlineMRJobRunner(conf_path=path) self.assertIn('%s tries to recursively include %s!' % (path, path), stderr.getvalue())
def test_doubly_recursive_include(self): conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf') conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf') with open(conf_path_1, 'w') as f: dump_mrjob_conf({'include': conf_path_2}, f) with open(conf_path_2, 'w') as f: dump_mrjob_conf({'include': conf_path_1}, f) self.assertEqual( load_opts_from_mrjob_conf('foo', conf_path_1), [(conf_path_2, {}), (conf_path_1, {})])
def test_relative_include(self): base_conf_path = os.path.join(self.tmp_dir, "mrjob.base.conf") real_base_conf_path = os.path.realpath(base_conf_path) conf_path = os.path.join(self.tmp_dir, "mrjob.conf") with open(base_conf_path, "w") as f: dump_mrjob_conf({}, f) with open(conf_path, "w") as f: dump_mrjob_conf({"include": "mrjob.base.conf"}, f) self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path), [(real_base_conf_path, {}), (conf_path, {})])
def test_relative_include(self): base_conf_path = os.path.join(self.tmp_dir, 'mrjob.base.conf') real_base_conf_path = os.path.realpath(base_conf_path) conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(base_conf_path, 'w') as f: dump_mrjob_conf({}, f) with open(conf_path, 'w') as f: dump_mrjob_conf({'include': 'mrjob.base.conf'}, f) self.assertEqual(load_opts_from_mrjob_conf('foo', conf_path), [(real_base_conf_path, {}), (conf_path, {})])
def test_conf_path_order_beats_include(self): conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf') conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf') with open(conf_path_1, 'w') as f: dump_mrjob_conf({}, f) with open(conf_path_2, 'w') as f: dump_mrjob_conf({}, f) # shouldn't matter that conf_path_1 includes conf_path_2 self.assertEqual( load_opts_from_mrjob_confs('foo', [conf_path_1, conf_path_2]), [(conf_path_1, {}), (conf_path_2, {})])
def test_nested_include(self): conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf') conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf') conf_path_3 = os.path.join(self.tmp_dir, 'mrjob.3.conf') # accidentally reversed the order of nested includes when # trying to make precedence work; this test would catch that with open(conf_path, 'w') as f: dump_mrjob_conf({'include': conf_path_1}, f) with open(conf_path_1, 'w') as f: dump_mrjob_conf({'include': [conf_path_2, conf_path_3]}, f) with open(conf_path_2, 'w') as f: dump_mrjob_conf({}, f) with open(conf_path_3, 'w') as f: dump_mrjob_conf({}, f) self.assertEqual( load_opts_from_mrjob_conf('foo', conf_path), [(conf_path_2, {}), (conf_path_3, {}), (conf_path_1, {}), (conf_path, {})])
def test_relative_include(self): base_conf_path = os.path.join(self.tmp_dir, 'mrjob.base.conf') real_base_conf_path = os.path.realpath(base_conf_path) conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(base_conf_path, 'w') as f: dump_mrjob_conf({}, f) with open(conf_path, 'w') as f: dump_mrjob_conf({'include': 'mrjob.base.conf'}, f) self.assertEqual( load_opts_from_mrjob_conf('foo', conf_path), [(real_base_conf_path, {}), (conf_path, {})])
def test_multiple_config_files(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) path_left = os.path.join(tmp_dir, 'left.yaml') path_right = os.path.join(tmp_dir, 'right.yaml') with open(path_left, 'w') as f: dump_mrjob_conf({'runners': {'inline': {'jobconf': {'x': 1}}}}, f) with open(path_right, 'w') as f: dump_mrjob_conf({'runners': {'inline': {'jobconf': {'y': 2}}}}, f) mr_job = MRCustomBoringJob(args=['-r', 'inline', '-c', path_left, '-c', path_right]) with mr_job.make_runner() as r: self.assertEqual(r._opts['jobconf']['x'], 1) self.assertEqual(r._opts['jobconf']['y'], 2)
def test_tilde_in_include(self): # regression test for #1308 os.environ['HOME'] = self.tmp_dir base_conf_path = os.path.join(self.tmp_dir, 'mrjob.base.conf') conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(base_conf_path, 'w') as f: dump_mrjob_conf({}, f) with open(conf_path, 'w') as f: dump_mrjob_conf({'include': '~/mrjob.base.conf'}, f) self.assertEqual(load_opts_from_mrjob_conf('foo', conf_path), [(base_conf_path, {}), (conf_path, {})])
def make_mrjob_conf(self): _, self.mrjob_conf_path = tempfile.mkstemp(prefix='mrjob.conf.') # include one fake kwarg, and one real one conf = {'runners': {'local': {'qux': 'quux', 'setup_cmds': ['echo foo']}}} with open(self.mrjob_conf_path, 'w') as conf_file: self.mrjob_conf = dump_mrjob_conf(conf, conf_file)
def test_tilde_in_include(self): # regression test for #1308 os.environ['HOME'] = self.tmp_dir base_conf_path = os.path.join(self.tmp_dir, 'mrjob.base.conf') conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(base_conf_path, 'w') as f: dump_mrjob_conf({}, f) with open(conf_path, 'w') as f: dump_mrjob_conf({'include': '~/mrjob.base.conf'}, f) self.assertEqual( load_opts_from_mrjob_conf('foo', conf_path), [(base_conf_path, {}), (conf_path, {})])
def make_mrjob_conf(self): _, self.mrjob_conf_path = tempfile.mkstemp(prefix='mrjob.conf.') # include one fake kwarg, and one real one conf = {'runners': {'inline': {'qux': 'quux', 'setup_cmds': ['echo foo']}}} with open(self.mrjob_conf_path, 'w') as conf_file: self.mrjob_conf = dump_mrjob_conf(conf, conf_file)
def test_symlink_to_duplicate_conf_path(self): conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') with open(conf_path, 'w') as f: dump_mrjob_conf({}, f) conf_symlink_path = os.path.join(self.tmp_dir, 'mrjob.conf.symlink') os.symlink('mrjob.conf', conf_symlink_path) self.assertEqual( load_opts_from_mrjob_confs( 'foo', [conf_path, conf_symlink_path]), [(conf_symlink_path, {})]) self.assertEqual( load_opts_from_mrjob_confs( 'foo', [conf_symlink_path, conf_path]), [(conf_path, {})])
def test_unexpected_opt_from_mrjob_conf(self): conf_path = self.makefile('mrjob.custom.conf') with open(conf_path, 'w') as f: dump_mrjob_conf( dict(runners=dict(local=dict(land='useless_swamp'))), f) job = MRTwoStepJob(['-r', 'local', '-c', conf_path]) job.sandbox() with job.make_runner(): self.assertTrue(self.log.warning.called) warnings = '\n'.join( arg[0][0] for arg in self.log.warning.call_args_list) self.assertIn('Unexpected option', warnings) self.assertIn('land', warnings) self.assertIn(conf_path, warnings)
def test_include_relative_to_real_path(self): os.mkdir(os.path.join(self.tmp_dir, "conf")) base_conf_path = os.path.join(self.tmp_dir, "conf", "mrjob.base.conf") real_base_conf_path = os.path.realpath(base_conf_path) conf_path = os.path.join(self.tmp_dir, "conf", "mrjob.conf") conf_symlink_path = os.path.join(self.tmp_dir, "mrjob.conf") with open(base_conf_path, "w") as f: dump_mrjob_conf({}, f) with open(conf_path, "w") as f: dump_mrjob_conf({"include": "mrjob.base.conf"}, f) os.symlink(os.path.join("conf", "mrjob.conf"), conf_symlink_path) self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path), [(real_base_conf_path, {}), (conf_path, {})]) # relative include should work from the symlink even though # it's not in the same directory as mrjob.base.conf self.assertEqual( load_opts_from_mrjob_conf("foo", conf_symlink_path), [(real_base_conf_path, {}), (conf_symlink_path, {})] )
def test_include_order_beats_include(self): conf_path = os.path.join(self.tmp_dir, "mrjob.conf") conf_path_1 = os.path.join(self.tmp_dir, "mrjob.1.conf") conf_path_2 = os.path.join(self.tmp_dir, "mrjob.2.conf") with open(conf_path, "w") as f: dump_mrjob_conf({"include": [conf_path_1, conf_path_2]}, f) with open(conf_path_1, "w") as f: dump_mrjob_conf({"include": [conf_path_2]}, f) with open(conf_path_2, "w") as f: dump_mrjob_conf({}, f) # shouldn't matter that conf_path_1 includes conf_path_2 self.assertEqual( load_opts_from_mrjob_conf("foo", conf_path), [(conf_path_1, {}), (conf_path_2, {}), (conf_path, {})] )
def make_tmp_dir_and_mrjob_conf(self): self.tmp_dir = tempfile.mkdtemp() self.mrjob_conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf({'runners': {'inline': {}}}, open(self.mrjob_conf_path, 'w'))
def save_conf(self, name, conf): conf_path = os.path.join(self.tmp_dir, name) with open(conf_path, 'w') as f: dump_mrjob_conf(conf, f) return conf_path
def updateMRJobConf(): conf = {'runners':{ 'hadoop':{'python_archives': ['/home/kykamath/projects/library/dist/my_library-1.0.tar.gz']} } } with open('/Users/kykamath/.mrjob', 'w') as f: dump_mrjob_conf(conf, f)
def make_tmp_dir_and_mrjob_conf(self): self.tmp_dir = tempfile.mkdtemp() self.mrjob_conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf({'runners': { 'inline': {} }}, open(self.mrjob_conf_path, 'w'))
def test_round_trip(self): conf = {'runners': {'foo': {'qux': 'quux'}}} conf_path = os.path.join(self.tmp_dir, 'mrjob.conf') dump_mrjob_conf(conf, open(conf_path, 'w')) assert_equal(conf, load_mrjob_conf(conf_path=conf_path))