def test_spark_with_step_num(self): job = MRJob(["--step-num=1", "--spark", "input_dir", "output_dir"]) mapper = MagicMock() spark = MagicMock() job.steps = Mock(return_value=[MRStep(mapper=mapper), SparkStep(spark)]) job.execute() spark.assert_called_once_with("input_dir", "output_dir") self.assertFalse(mapper.called)
def mock_Popen(*args, **kwargs): mock_proc = MagicMock() mock_proc.stdout = MagicMock() mock_proc.stdout.__iter__.return_value = [ b'line1\n', b'line2\n'] mock_proc.stderr = MagicMock() mock_proc.stderr.__iter__.return_value = [ b'Emergency, everybody to get from street\n'] mock_proc.wait.return_value = 0 return mock_proc
def test_spark(self): job = MRJob(['--spark', 'input_dir', 'output_dir']) job.spark = MagicMock() job.execute() job.spark.assert_called_once_with('input_dir', 'output_dir')
def test_wrong_type_of_step(self): mr_job = MRJob() mr_job.spark = MagicMock() self.assertRaises(TypeError, mr_job.run_mapper) self.assertRaises(TypeError, mr_job.run_combiner) self.assertRaises(TypeError, mr_job.run_reducer)
def test_spark_method(self): j = MRJob(['--no-conf']) j.spark = MagicMock() self.assertEqual(j.steps(), [SparkStep(j.spark)]) self.assertEqual(j._steps_desc(), [dict(type='spark', spark_args=[])])
def prepare_runner_for_ssh(self, runner, num_workers=0): # TODO: Refactor this abomination of a test harness # Set up environment variables os.environ['MOCK_SSH_VERIFY_KEY_FILE'] = 'true' # Create temporary directories and add them to MOCK_SSH_ROOTS master_ssh_root = tempfile.mkdtemp(prefix='master_ssh_root.') os.environ['MOCK_SSH_ROOTS'] = 'testmaster=%s' % master_ssh_root mock_ssh_dir('testmaster', _EMR_LOG_DIR + '/hadoop/history') if not hasattr(self, 'worker_ssh_roots'): self.worker_ssh_roots = [] self.addCleanup(self.teardown_ssh, master_ssh_root) # Make the fake binary os.mkdir(os.path.join(master_ssh_root, 'bin')) self.ssh_bin = os.path.join(master_ssh_root, 'bin', 'ssh') create_mock_ssh_script(self.ssh_bin) self.ssh_add_bin = os.path.join(master_ssh_root, 'bin', 'ssh-add') create_mock_ssh_script(self.ssh_add_bin) # Make a fake keyfile so that the 'file exists' requirements are # satsified self.keyfile_path = os.path.join(master_ssh_root, 'key.pem') with open(self.keyfile_path, 'w') as f: f.write('I AM DEFINITELY AN SSH KEY FILE') # Tell the runner to use the fake binary runner._opts['ssh_bin'] = [self.ssh_bin] runner._opts['ssh_add_bin'] = [self.ssh_add_bin] # Also pretend to have an SSH key pair file runner._opts['ec2_key_pair_file'] = self.keyfile_path # use fake hostname runner._address_of_master = MagicMock(return_value='testmaster') runner._master_private_ip = MagicMock(return_value='172.172.172.172') # re-initialize fs runner._fs = None
def _make_launcher(self, *args): """Make a launcher, add a mock runner (``launcher.mock_runner``), and set it up so that ``launcher.make_runner().__enter__()`` returns ``launcher.mock_runner()``. """ launcher = MRJobLauncher(args=['--no-conf', ''] + list(args)) launcher.sandbox() launcher.mock_runner = Mock() launcher.mock_runner.stream_output.return_value = [b'a line\n'] launcher.make_runner = MagicMock() # include __enter__ launcher.make_runner.return_value.__enter__.return_value = ( launcher.mock_runner) return launcher
def setUp(self): super(SparkSubmitToolTestCase, self).setUp() self.runner_class = None self.runner = MagicMock() def _mock_runner_class(runner_alias): rc = _runner_class(runner_alias) self.runner_class = Mock() self.runner_class.return_value = self.runner self.runner_class.alias = rc.alias self.runner_class.OPT_NAMES = rc.OPT_NAMES return self.runner_class self.runner_class = self.start( patch('mrjob.tools.spark_submit._runner_class', side_effect=_mock_runner_class)) self.runner_log = self.start(patch('mrjob.runner.log')) # don't actually want to exit after printing help self.exit = self.start(patch('sys.exit', side_effect=MockSystemExit)) # don't set up logging self.set_up_logging = self.start( patch('mrjob.job.MRJob.set_up_logging')) # save printout, rather than actually printing self.printout = '' def _mock_print_message(self_, message, file=None): self.printout += message self.start( patch('argparse.ArgumentParser._print_message', _mock_print_message)) def _mock_print(s=''): self.printout += s + '\n' # print() isn't considered part of the module in Python 3.4 self.start( patch('mrjob.tools.spark_submit.print', _mock_print, create=True))
def mock_Popen(*args, **kwargs): mock_proc = MagicMock() mock_proc.stdout = MagicMock() mock_proc.stdout.__iter__.return_value = [b'line1\n', b'line2\n'] mock_proc.stderr = MagicMock() mock_proc.stderr.__iter__.return_value = [ b'Emergency, everybody to get from street\n' ] mock_proc.wait.return_value = 0 return mock_proc
def setUpClass(cls): super(SingleSparkContextTestCase, cls).setUpClass() if not PY2: # ignore Python 3 warnings about unclosed filehandles filterwarnings('ignore', category=ResourceWarning) from pyspark import SparkContext cls.spark_context = SparkContext() # move stop() so that scripts can't call it cls.spark_context.really_stop = cls.spark_context.stop cls.spark_context.stop = MagicMock() try: cls.spark_context.setLogLevel('FATAL') except: # tearDownClass() won't be called if there's an exception cls.spark_context.really_stop() raise
def test_too_few_args(self): job = MRJob(['--spark']) job.spark = MagicMock() self.assertRaises(ValueError, job.execute)
def test_wrong_step_num(self): job = MRJob(['--step-num=1', '--spark', 'input_dir', 'output_dir']) job.spark = MagicMock() self.assertRaises(ValueError, job.execute)
def test_wrong_step_type(self): job = MRJob(['--spark', 'input_dir', 'output_dir']) job.mapper = MagicMock() self.assertRaises(TypeError, job.execute)
def test_spark_args_ignored_without_spark(self): j = MRJob(['--no-conf']) j.reducer = MagicMock() j.spark_args = MagicMock(spark_args=['argh', 'ARRRRGH!']) self.assertEqual(j.steps(), [MRStep(reducer=j.reducer)])
def test_spark_and_streaming_dont_mix(self): j = MRJob(['--no-conf']) j.mapper = MagicMock() j.spark = MagicMock() self.assertRaises(ValueError, j.steps)
def test_too_many_args(self): job = MRJob(['--spark', 'input_dir', 'output_dir', 'error_dir']) job.spark = MagicMock() self.assertRaises(ValueError, job.execute)
def patch_fs_s3(): m_boto = MagicMock() m_s3 = m_boto.connect_s3() m_s3.get_all_buckets.__name__ = 'get_all_buckets' return patch('mrjob.fs.s3.boto', m_boto)