def setUp(self): # if save_sys_std() *doesn't* work, don't mess up other tests super(SaveSysStdTestCase, self).setUp() self.stdin = self.start(patch('sys.stdin')) self.stdout = self.start(patch('sys.stdout')) self.stderr = self.start(patch('sys.stderr'))
def test_logging_stderr_in_cleanup(self): def mock_Popen(*args, **kwargs): mock_proc = MagicMock() mock_proc.stdout = MagicMock() mock_proc.stdout.__iter__.return_value = [ b'line1\n', b'line2\n'] mock_proc.stderr = MagicMock() mock_proc.stderr.__iter__.return_value = [ b'Emergency, everybody to get from street\n'] mock_proc.wait.return_value = 0 return mock_proc self.start(patch('mrjob.fs.hadoop.Popen', mock_Popen)) mock_log = self.start(patch('mrjob.fs.hadoop.log')) fs = HadoopFilesystem() data = b''.join(fs._cat_file('/some/path')) self.assertEqual(data, b'line1\nline2\n') mock_log.error.assert_called_once_with( 'STDERR: Emergency, everybody to get from street')
def setUp(self): super(InterpretEMRStepStderrTestCase, self).setUp() # instead of mocking out contents of files, just mock out # what _parse_step_syslog() should return, and have # _cat_log() just pass through the path self.mock_paths = [] self.path_to_mock_result = {} self.mock_paths_catted = [] def mock_cat_log(fs, path): if path in self.mock_paths: self.mock_paths_catted.append(path) return path def mock_parse_task_stderr(path_from_mock_cat_log): return self.path_to_mock_result.get(path_from_mock_cat_log) # need to mock ls so that _ls_task_syslogs() can work def mock_exists(path): return path in self.mock_paths def mock_ls(log_dir): return self.mock_paths self.mock_fs = Mock() self.mock_fs.ls = Mock(side_effect=mock_ls) self.mock_cat_log = self.start(patch("mrjob.logs.step._cat_log", side_effect=mock_cat_log)) self.start(patch("mrjob.logs.step._parse_task_stderr", side_effect=mock_parse_task_stderr))
def setUp(self): super(AuditUsageTestCase, self).setUp() self.repeat_sleep = self.start(patch('time.sleep')) # this is called once per cluster (no pagination), so we can # test quantity as well as whether it was called self.describe_cluster_sleep = self.start( patch('mrjob.tools.emr.audit_usage.sleep'))
def setUp(self): super(StepPickingTestCase, self).setUp() self.pick_error = self.start( patch('mrjob.emr.EMRJobRunner._pick_error', side_effect=StopIteration)) self.log = self.start( patch('mrjob.tools.diagnose.log'))
def setUp(self): super(InterpretTaskLogsTestCase, self).setUp() self.runner._ls_task_logs = Mock() self._interpret_task_logs = ( self.start(patch('mrjob.logs.mixin._interpret_task_logs'))) self._interpret_spark_task_logs = ( self.start(patch('mrjob.logs.mixin._interpret_spark_task_logs'))) self.runner.get_hadoop_version = Mock(return_value='2.7.1')
def setUp(self): super(LsTaskLogsTestCase, self).setUp() self._ls_task_logs = self.start(patch( 'mrjob.logs.mixin._ls_task_logs')) self._ls_spark_task_logs = self.start(patch( 'mrjob.logs.mixin._ls_spark_task_logs')) self.runner._stream_task_log_dirs = Mock()
def setUp(self): super(JoinTestCase, self).setUp() # os.path.join() and posixpath.join() do the same thing in # UNIX and OS X, so track which one we called self.start(patch('os.path.join', wraps=os.path.join)) self.start(patch('posixpath.join', wraps=posixpath.join)) self.fs = Filesystem()
def setUp(self): super(DeprecatedSwitchesTestCase, self).setUp() self._maybe_terminate_clusters = self.start(patch( 'mrjob.tools.emr.terminate_idle_clusters.' '_maybe_terminate_clusters')) self.log = self.start( patch('mrjob.tools.emr.terminate_idle_clusters.log'))
def setUp(self): super(InterpretEMRBootstrapStderrTestCase, self).setUp() self.mock_fs = Mock() self.mock_parse_task_stderr = self.start( patch('mrjob.logs.bootstrap._parse_task_stderr', return_value=dict(message='BOOM!\n'))) self.mock_cat_log = self.start(patch('mrjob.logs.bootstrap._cat_log'))
def setUp(self): super(SortBinTestCase, self).setUp() # these patches are only okay if they don't raise an exception; # otherwise that hands an un-pickleable stacktrace to multiprocessing self.check_call = self.start(patch( 'mrjob.local.check_call', wraps=check_call)) self._sort_lines_in_memory = self.start(patch( 'mrjob.local._sort_lines_in_memory', wraps=_sort_lines_in_memory))
def setUp(self): super(StepPickingTestCase, self).setUp() self.pick_error = self.start( patch('mrjob.emr.EMRJobRunner._pick_error', side_effect=StopIteration)) self.log = self.start( patch('mrjob.tools.diagnose.log')) # don't print logging messages when we start the diagnose tool self.log_to_stream = self.start( patch('mrjob.launch.log_to_stream'))
def setUp(self): super(WrapAWSClientTestCase, self).setUp() # don't actually wait between retries self.sleep = self.start(patch('time.sleep')) self.log = self.start(patch('mrjob.retry.log')) self.list_buckets = self.start(patch( 'tests.mock_boto3.s3.MockS3Client.list_buckets', side_effect=[dict(Buckets=[])])) self.client = self.client('s3') self.wrapped_client = _wrap_aws_client(self.client)
def setUp(self): super(SparkPyFilesTestCase, self).setUp() # don't bother actually running spark self.start(patch( 'mrjob.spark.runner.SparkMRJobRunner._run_spark_submit', return_value=0))
def test_get_location_is_forbidden(self): self.add_mock_s3_data({'walrus': {}}, location='us-west-2') fs = S3Filesystem() access_denied_error = ClientError( dict( Error=dict( Code='AccessDenied', Message='Access Denied', ), ResponseMetadata=dict( HTTPStatusCode=403 ), ), 'GetBucketLocation') with patch( 'tests.mock_boto3.s3.MockS3Client.get_bucket_location', side_effect=access_denied_error): bucket = fs.get_bucket('walrus') self.assertEqual(bucket.meta.client.meta.endpoint_url, 'https://s3.amazonaws.com') self.assertEqual(bucket.meta.client.meta.region_name, 'us-east-1')
def setUp(self): self._dataproc_client = MockDataprocClient(self) self._gcs_client = MockGCSClient(self) self._gcs_fs = self._gcs_client._fs self.start(patch.object( DataprocJobRunner, 'api_client', self._dataproc_client)) self.gcs_patch_api_client = patch.object( GCSFilesystem, 'api_client', self._gcs_client) self.gcs_patch_download_io = patch.object( GCSFilesystem, '_download_io', self._gcs_client.download_io) self.gcs_patch_upload_io = patch.object( GCSFilesystem, '_upload_io', self._gcs_client.upload_io) self.start(self.gcs_patch_api_client) self.start(self.gcs_patch_download_io) self.start(self.gcs_patch_upload_io) self.start(patch('mrjob.dataproc._read_gcloud_config', lambda: _GCLOUD_CONFIG)) super(MockGoogleAPITestCase, self).setUp() # patch slow things def fake_create_mrjob_tar_gz(mocked_self, *args, **kwargs): mocked_self._mrjob_tar_gz_path = self.fake_mrjob_tgz_path return self.fake_mrjob_tgz_path self.start(patch.object( DataprocJobRunner, '_create_mrjob_tar_gz', fake_create_mrjob_tar_gz)) self.start(patch.object(time, 'sleep'))
def setUp(self): super(LsLogsTestCase, self).setUp() self.mock_fs = Mock() self.mock_paths = [] def mock_fs_ls(log_dir): prefix = log_dir.rstrip('/') + '/' exists = False for p in self.mock_paths: if isinstance(p, Exception): raise p elif p.startswith(prefix): yield p exists = True if not exists: raise IOError def mock_fs_exists(log_dir): return any(mock_fs_ls(log_dir)) self.mock_fs.ls = Mock(side_effect=mock_fs_ls) self.mock_fs.exists = Mock(side_effect=mock_fs_exists) # a matcher that cheerfully passes through kwargs def mock_matcher(path, **kwargs): return dict(**kwargs) self.mock_matcher = Mock(side_effect=mock_matcher) self.log = self.start(patch('mrjob.logs.wrap.log'))
def test_infer_from_hadoop_bin_realpath(self): with patch('posixpath.realpath', return_value='/ha/do/op/bin'): self.runner = HadoopJobRunner(hadoop_bin=['/usr/bin/hadoop']) self.mock_paths.append('/ha/do/op/hadoop-streaming.jar') self.assertEqual(self.runner._find_hadoop_streaming_jar(), '/ha/do/op/hadoop-streaming.jar')
def setUp(self): super(InterpretTaskLogsTestCase, self).setUp() # instead of mocking out contents of files, just mock out # what _parse_task_{syslog,stderr}() should return, and have # _cat_log_lines() just pass through the path self.mock_paths = [] self.path_to_mock_result = {} self.mock_log_callback = Mock() self.mock_paths_catted = [] def mock_cat_log_lines(fs, path): if path in self.mock_paths: self.mock_paths_catted.append(path) return path # (the actual log-parsing functions take lines from the log) def mock_parse_task_syslog(path_from_mock_cat_log_lines): # default is {} return self.path_to_mock_result.get( path_from_mock_cat_log_lines, {}) def mock_parse_task_stderr(path_from_mock_cat_log_lines): # default is None return self.path_to_mock_result.get(path_from_mock_cat_log_lines) def mock_exists(path): return path in self.mock_paths or path == 'MOCK_LOG_DIR' # need to mock ls so that _ls_task_logs() can work def mock_ls(log_dir): return self.mock_paths self.mock_fs = Mock() self.mock_fs.exists = Mock(side_effect=mock_exists) self.mock_fs.ls = Mock(side_effect=mock_ls) self.mock_cat_log_lines = self.start( patch('mrjob.logs.task._cat_log_lines', side_effect=mock_cat_log_lines)) self.start(patch('mrjob.logs.task._parse_task_syslog', side_effect=mock_parse_task_syslog)) self.start(patch('mrjob.logs.task._parse_task_stderr', side_effect=mock_parse_task_stderr))
def setUp(self): super(MRBossTestCase, self).setUp() self.ssh_worker_hosts = self.start(patch( 'mrjob.emr.EMRJobRunner._ssh_worker_hosts', return_value=[])) self.make_runner()
def test_option_debug_printout(self): log = self.start(patch('mrjob.runner.log')) InlineMRJobRunner(owner='dave') debug = ''.join(a[0] + '\n' for a, kw in log.debug.call_args_list) self.assertIn("'owner'", debug) self.assertIn("'dave'", debug)
def setUp(self): super(PickErrorsTestCase, self).setUp() self.runner._interpret_history_log = Mock() self.runner._interpret_step_logs = Mock() self.runner._interpret_task_logs = Mock() self._pick_error = self.start( patch('mrjob.logs.mixin._pick_error'))
def setUp(self): super(CatLogsTestCase, self) self.mock_data = None self.mock_fs = Mock() self.mock_fs.cat = Mock(return_value=()) self.mock_fs.exists = Mock(return_value=True) self.mock_log = self.start(patch('mrjob.logs.wrap.log'))
def setUp(self): super(InterpretHistoryLogTestCase, self).setUp() self.mock_fs = Mock() # don't include errors in return value, as they get patched mock_return_value = dict( counters={'foo': {'bar': 42}}, errors=[]) self.mock_parse_yarn_history_log = self.start( patch('mrjob.logs.history._parse_yarn_history_log', return_value=mock_return_value)) self.mock_parse_pre_yarn_history_log = self.start( patch('mrjob.logs.history._parse_pre_yarn_history_log', return_value=mock_return_value)) self.mock_cat_log = self.start(patch('mrjob.logs.history._cat_log'))
def test_disable_check_input_paths(self): missing_data = os.path.join(self.tmp_dir, 'data') job = MRWordCount(['--no-check-input-paths', missing_data]) self.start(patch('mrjob.inline.InlineMRJobRunner._run', side_effect=StopIteration)) with job.make_runner() as runner: self.assertRaises(StopIteration, runner.run)
def setUp(self): super(StreamingLogDirsTestCase, self).setUp() self.log = self.start(patch('mrjob.hadoop.log')) self.runner = HadoopJobRunner() self.runner._hadoop_log_dirs = Mock(return_value=[]) self.runner.fs.exists = Mock(return_value=True) self.log.reset_mock() # ignore logging from HadoopJobRunner init
def test_hadoop_home_regression(self): # kill $HADOOP_HOME if it exists try: del os.environ['HADOOP_HOME'] except KeyError: pass with patch('mrjob.hadoop.find_hadoop_streaming_jar', return_value='some.jar'): HadoopJobRunner(hadoop_home=self.tmp_dir, conf_paths=[])
def test_explicit_spark_tmp_dir_path(self): # posixpath.join() and os.path.join() are the same on UNIX self.start(patch('os.path.join', lambda *paths: '/./'.join(paths))) runner = SparkMRJobRunner(spark_tmp_dir='/path/to/tmp') self.assertTrue(runner._spark_tmp_dir.startswith('/path/to/tmp/./')) self.assertGreater(len(runner._spark_tmp_dir), len('/path/to/tmp/./')) self.assertIsNone(runner._upload_mgr)
def test_get_location_other_error(self): self.add_mock_s3_data({'walrus': {}}, location='us-west-2') fs = S3Filesystem() with patch( 'tests.mockboto.MockBucket.get_location', side_effect=boto.exception.S3ResponseError(404, 'Not Found')): self.assertRaises(boto.exception.S3ResponseError, fs.get_bucket, 'walrus')
def setUp(self): super(FindHadoopBinTestCase, self).setUp() # track calls to which() self.which = self.start(patch('mrjob.fs.hadoop.which', wraps=which)) # keep which() from searching in /bin, etc. os.environ['PATH'] = self.tmp_dir # create basic HadoopFilesystem (okay to overwrite) self.fs = HadoopFilesystem()
def setUp(self): super(ParseOutputLine, self).setUp() self.log = self.start(patch('mrjob.job.log'))
def setUp(self): super(LsHistoryLogsTestCase, self).setUp() self._ls_history_logs = self.start( patch('mrjob.logs.mixin._ls_history_logs')) self.runner._stream_history_log_dirs = Mock()
def test_empty(self): with patch('getpass.getuser') as getuser: getuser.return_value = 'dave' self.assertEqual(fully_qualify_hdfs_path(''), 'hdfs:///user/dave/')
def setUp(self): super(DeprecatedReadFileTestCase, self).setUp() self.start(patch('mrjob.util.log'))
def setUp(self): super(GCSFilesystemInitTestCase, self).setUp() self.log = self.start(patch('mrjob.fs.gcs.log')) self.Client = self.start(patch('google.cloud.storage.client.Client'))
def setUp(self): super(TranslateJobConfDictTestCase, self).setUp() self.log = self.start(patch('mrjob.compat.log'))
def setUp(self): super(UnexpectedOptsWarningTestCase, self).setUp() self.log = self.start(patch('mrjob.runner.log'))
def setUp(self): super(SparkSubmitLocallyTestCase, self).setUp() # don't set up logging self.set_up_logging = self.start( patch('mrjob.job.MRJob.set_up_logging'))
def setUp(self): super(DeprecatedOptionHooksTestCase, self).setUp() self.start(patch('mrjob.launch.log'))
def setUp(self): super(PrintHelpTestCase, self).setUp() self.exit = self.start(patch('sys.exit')) self.stdout = self.start(patch.object(sys, 'stdout', StringIO()))
def setUp(self): super(S3FSTestCase, self).setUp() self.fs = S3Filesystem() self.TransferConfig = self.start( patch('boto3.s3.transfer.TransferConfig'))
def setUp(self): super(SingleSparkContextTestCase, self).setUp() self.start( patch('pyspark.SparkContext', return_value=self.spark_context))
def setUp(self): super(ParseIndentedCountersTestCase, self).setUp() self.log = self.start(patch('mrjob.logs.step.log'))
def monkey_patch_argv(self, *args): p = patch('sys.argv', [sys.argv[0]] + list(args)) self.addCleanup(p.stop) p.start()
def monkey_patch_stderr(self): p = patch('sys.stderr', mock_stdout_or_stderr()) self.addCleanup(p.stop) p.start()
def setUp(self): super(LsTaskSyslogsTestCase, self).setUp() self._ls_task_syslogs = self.start( patch('mrjob.logs.mixin._ls_task_syslogs')) self.runner._stream_task_log_dirs = Mock()
def setUp(self): super(RegionAndZoneOptsTestCase, self).setUp() self.log = self.start(patch('mrjob.dataproc.log'))
def setUp(self): super(PassStepsToRunnerTestCase, self).setUp() self.log = self.start(patch('mrjob.runner.log'))
def test_relative_path(self): with patch('getpass.getuser') as getuser: getuser.return_value = 'dave' self.assertEqual(fully_qualify_hdfs_path('path/to/chocolate'), 'hdfs:///user/dave/path/to/chocolate')
def setUp(self): super(SparkTmpDirTestCase, self).setUp() self.log = self.start(patch('mrjob.spark.runner.log'))
def setUp(self): self.runner = self.MockRunner() self.log = self.start(patch('mrjob.logs.mixin.log'))
def setUp(self): super(GroupStepsTestCase, self).setUp() self.run_step_on_spark = self.start(patch( 'mrjob.spark.runner.SparkMRJobRunner._run_step_on_spark'))
def patch_fs_s3(): m_boto = MagicMock() m_s3 = m_boto.connect_s3() m_s3.get_all_buckets.__name__ = 'get_all_buckets' return patch('mrjob.fs.s3.boto', m_boto)
def setUp(self): super(InterpretHistoryLogTestCase, self).setUp() self.runner._ls_history_logs = Mock() self._interpret_history_log = (self.start( patch('mrjob.logs.mixin._interpret_history_log')))
def setUp(self): super(NumCoresTestCase, self).setUp() self.pool = self.start(patch('mrjob.local.Pool', wraps=Pool))
def setUp(self): super(LocalMRJobRunnerEndToEndTestCase, self).setUp() self.start(patch('os.symlink', side_effect=OSError))
def setUp(self): super(MultipleConfigFilesMachineryTestCase, self).setUp() self.log = self.start(patch('mrjob.conf.log'))
def setUp(self): super(MockFilesystemsTestCase, self).setUp() self.log = self.start(patch('mrjob.spark.runner.log'))
def setUp(self): super(MRJobConfNoYAMLTestCase, self).setUp() self.start(patch('mrjob.conf.yaml', None))
def setUp(self): super(DeprecatedFileUploadArgsTestCase, self).setUp() self.log = self.start(patch('mrjob.runner.log'))