def test_non_log_lines(self): lines = StringIO('foo\n' 'bar\n' '15/12/11 13:26:08 ERROR streaming.StreamJob:' ' Error Launching job :' ' Output directory already exists\n' 'Streaming Command Failed!') with no_handlers_for_logger('mrjob.logs.parse'): stderr = StringIO() log_to_stream('mrjob.logs.parse', stderr) self.assertEqual( list(_parse_hadoop_log_lines(lines)), [ # ignore leading non-log lines dict( timestamp='15/12/11 13:26:08', level='ERROR', logger='streaming.StreamJob', thread=None, # no way to know that Streaming Command Failed! wasn't part # of a multi-line message message=('Error Launching job :' ' Output directory already exists\n' 'Streaming Command Failed!')) ]) # should be one warning for each leading non-log line log_lines = stderr.getvalue().splitlines() self.assertEqual(len(log_lines), 2)
def test_deprecated_mapper_final_positional_arg(self): def mapper(k, v): pass def reducer(k, v): pass def mapper_final(): pass stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) step = MRJob.mr(mapper, reducer, mapper_final) # should be allowed to specify mapper_final as a positional arg, # but we log a warning self.assertEqual(step, MRJob.mr(mapper=mapper, reducer=reducer, mapper_final=mapper_final)) self.assertIn('mapper_final should be specified', stderr.getvalue()) # can't specify mapper_final as a positional and keyword arg self.assertRaises( TypeError, MRJob.mr, mapper, reducer, mapper_final, mapper_final=mapper_final)
def test_non_log_lines(self): lines = StringIO( "foo\n" "bar\n" "15/12/11 13:26:08 ERROR streaming.StreamJob:" " Error Launching job :" " Output directory already exists\n" "Streaming Command Failed!" ) with no_handlers_for_logger("mrjob.logs.parse"): stderr = StringIO() log_to_stream("mrjob.logs.parse", stderr) self.assertEqual( list(_parse_hadoop_log_lines(lines)), [ # ignore leading non-log lines dict( timestamp="15/12/11 13:26:08", level="ERROR", logger="streaming.StreamJob", thread=None, # no way to know that Streaming Command Failed! wasn't part # of a multi-line message message=( "Error Launching job :" " Output directory already exists\n" "Streaming Command Failed!" ), ) ], ) # should be one warning for each leading non-log line log_lines = stderr.getvalue().splitlines() self.assertEqual(len(log_lines), 2)
def test_failed_job(self): mr_job = MRTwoStepJob(['-r', 'dataproc', '-v']) mr_job.sandbox() with no_handlers_for_logger('mrjob.dataproc'): stderr = StringIO() log_to_stream('mrjob.dataproc', stderr) self._dataproc_client.job_get_advances_states = ( collections.deque(['SETUP_DONE', 'RUNNING', 'ERROR'])) with mr_job.make_runner() as runner: self.assertIsInstance(runner, DataprocJobRunner) self.assertRaises(StepFailedException, runner.run) self.assertIn(' => ERROR\n', stderr.getvalue()) cluster_id = runner.get_cluster_id() # job should get terminated cluster = ( self._dataproc_client._cache_clusters[_TEST_PROJECT][cluster_id]) cluster_state = self._dataproc_client.get_state(cluster) self.assertEqual(cluster_state, 'DELETING')
def assert_hadoop_version(self, JobClass, version_string): mr_job = JobClass() mock_log = StringIO() with no_handlers_for_logger("mrjob.job"): log_to_stream("mrjob.job", mock_log) self.assertEqual(mr_job.jobconf()["hadoop_version"], version_string) self.assertIn("should be a string", mock_log.getvalue())
def main(): global output_file global total_steps_cap global csv_file global generate_database global edge_prob_type global sql_mode global seeds_file global probs parameters = parser.parse_args() csv_file, db_name, generate_database, edge_prob_type, sql_mode, res_fname, seeds, bfs_method, cores, output_mode, scale = parameters.csv, parameters.dataset, \ parameters.generate_database, parameters.edge_prob_type, parameters.sql, parameters.res_fname, parameters.seeds, parameters.bfs_method, parameters.cores, parameters.output_mode, parameters.scale if bfs_method == 'emr': print_out("setting up logging", output_mode) log_to_stream() print_out("done", output_mode) print_out('Evaluation algorithm. Dataset: %s' % (db_name), output_mode) link_server = LinkServerCP(db_name, undirected=parameters.undirected) if bfs_method == 'seq': print_out('Loading seeds set', output_mode) seeds_sets = cp.load(open(seeds, 'r')) for i in xrange(len(seeds_sets)): EstimateInfluence(link_server, bfs_method, parameters.tau_scale, seeds_sets[i], res_fname,\ cores, output_mode,scale, parameters.output_results,\ init_tau = parameters.init_tau, iter_samples = parameters.iter_samples) else: EstimateInfluence(link_server, bfs_method, seeds_sets[i], res_fname, cores, output_mode)
def main(): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args() if args: option_parser.error('takes no arguments') # set up logging if not options.quiet: log_to_stream(name='mrjob', debug=options.verbose) # create the persistent job runner_kwargs = { 'conf_path': options.conf_path, 'ec2_instance_type': options.ec2_instance_type, 'ec2_master_instance_type': options.ec2_master_instance_type, 'ec2_slave_instance_type': options.ec2_slave_instance_type, 'label': options.label, 'num_ec2_instances': options.num_ec2_instances, 'owner': options.owner, } runner = EMRJobRunner(**runner_kwargs) emr_job_flow_id = runner.make_persistent_job_flow() print emr_job_flow_id
def test_hadoop_runner_option_store(self): stderr = StringIO() with no_handlers_for_logger('mrjob.conf'): log_to_stream('mrjob.conf', stderr) # HadoopRunnerOptionStore really wants to find the streaming jar with patch.object(mrjob.hadoop, 'find_hadoop_streaming_jar', return_value='found'): opts = HadoopRunnerOptionStore( 'hadoop', dict(base_tmp_dir='/scratch', hadoop_home='required', hdfs_scratch_dir='hdfs:///scratch'), []) self.assertEqual(opts['local_tmp_dir'], '/scratch') self.assertNotIn('base_tmp_dir', opts) self.assertIn( 'Deprecated option base_tmp_dir has been renamed' ' to local_tmp_dir', stderr.getvalue()) self.assertEqual(opts['hadoop_tmp_dir'], 'hdfs:///scratch') self.assertNotIn('hdfs_scratch_dir', opts) self.assertIn( 'Deprecated option hdfs_scratch_dir has been renamed' ' to hadoop_tmp_dir', stderr.getvalue())
def test_deprecated_mapper_final_positional_arg(self): def mapper(k, v): pass def reducer(k, v): pass def mapper_final(): pass stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) step = MRJob.mr(mapper, reducer, mapper_final) # should be allowed to specify mapper_final as a positional arg, # but we log a warning self.assertEqual( step, MRJob.mr( mapper=mapper, reducer=reducer, mapper_final=mapper_final)) self.assertIn('mapper_final should be specified', stderr.getvalue()) # can't specify mapper_final as a positional and keyword arg self.assertRaises( TypeError, MRJob.mr, mapper, reducer, mapper_final, mapper_final=mapper_final)
def test_cleanup_options(self): stderr = StringIO() with no_handlers_for_logger('mrjob.runner'): log_to_stream('mrjob.runner', stderr) opts = RunnerOptionStore( 'inline', dict(cleanup=['LOCAL_SCRATCH', 'REMOTE_SCRATCH'], cleanup_on_failure=['JOB_FLOW', 'SCRATCH']), []) self.assertEqual(opts['cleanup'], ['LOCAL_TMP', 'CLOUD_TMP']) self.assertIn( 'Deprecated cleanup option LOCAL_SCRATCH has been renamed' ' to LOCAL_TMP', stderr.getvalue()) self.assertIn( 'Deprecated cleanup option REMOTE_SCRATCH has been renamed' ' to CLOUD_TMP', stderr.getvalue()) self.assertEqual(opts['cleanup_on_failure'], ['CLUSTER', 'TMP']) self.assertIn( 'Deprecated cleanup_on_failure option JOB_FLOW has been' ' renamed to CLUSTER', stderr.getvalue()) self.assertIn( 'Deprecated cleanup_on_failure option SCRATCH has been renamed' ' to TMP', stderr.getvalue())
def test_messy_error(self): counter_string = 'Job JOBID="_001" FAILED_REDUCES="0" COUNTERS="THIS IS NOT ACTUALLY A COUNTER"' with no_handlers_for_logger(''): stderr = StringIO() log_to_stream('mrjob.parse', stderr, level=logging.WARN) assert_equal((None, None), parse_hadoop_counters_from_line(counter_string)) assert_in('Cannot parse Hadoop counter line', stderr.getvalue())
def test_failed_job(self): mr_job = MRTwoStepJob(['-r', 'dataproc', '-v']) mr_job.sandbox() with no_handlers_for_logger('mrjob.dataproc'): stderr = StringIO() log_to_stream('mrjob.dataproc', stderr) self._dataproc_client.job_get_advances_states = (collections.deque( ['SETUP_DONE', 'RUNNING', 'ERROR'])) with mr_job.make_runner() as runner: self.assertIsInstance(runner, DataprocJobRunner) self.assertRaises(StepFailedException, runner.run) self.assertIn(' => ERROR\n', stderr.getvalue()) cluster_id = runner.get_cluster_id() # job should get terminated cluster = ( self._dataproc_client._cache_clusters[_TEST_PROJECT][cluster_id]) cluster_state = self._dataproc_client.get_state(cluster) self.assertEqual(cluster_state, 'DELETING')
def test_mixed_behavior_2(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) mr_job = self.MRInconsistentJob2() self.assertEqual(mr_job.options.input_protocol, None) self.assertEqual(mr_job.input_protocol().__class__, ReprProtocol) self.assertIn('custom behavior', stderr.getvalue())
def test_mixed_behavior_2(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) mr_job = self.MRInconsistentJob2() assert_equal(mr_job.options.input_protocol, None) assert_equal(mr_job.input_protocol().__class__, ReprProtocol) assert_in('custom behavior', stderr.getvalue())
def assert_hadoop_version(self, JobClass, version_string): mr_job = JobClass() mock_log = StringIO() with no_handlers_for_logger('mrjob.job'): log_to_stream('mrjob.job', mock_log) self.assertEqual(mr_job.jobconf()['hadoop_version'], version_string) self.assertIn('should be a string', mock_log.getvalue())
def updated_and_warnings(self, jobconf, hadoop_version): jobconf = jobconf.copy() with no_handlers_for_logger("mrjob.runner"): stderr = StringIO() log_to_stream("mrjob.runner", stderr) self.runner._update_jobconf_for_hadoop_version(jobconf, hadoop_version) return jobconf, stderr.getvalue()
def test_default_protocols(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) mr_job = MRBoringJob() assert_equal(mr_job.options.input_protocol, 'raw_value') assert_equal(mr_job.options.protocol, 'json') assert_equal(mr_job.options.output_protocol, 'json') assert_not_in('deprecated', stderr.getvalue())
def test_overriding_explicit_default_protocols(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) mr_job = self.MRBoringJob2(args=['--protocol=json']) assert_equal(mr_job.options.input_protocol, 'json') assert_equal(mr_job.options.protocol, 'json') assert_equal(mr_job.options.output_protocol, 'repr') assert_in('deprecated', stderr.getvalue())
def test_overriding_explicit_default_protocols(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) mr_job = self.MRBoringJob2(args=['--protocol=json']) self.assertEqual(mr_job.options.input_protocol, 'json') self.assertEqual(mr_job.options.protocol, 'json') self.assertEqual(mr_job.options.output_protocol, 'repr') self.assertIn('deprecated', stderr.getvalue())
def updated_and_warnings(self, jobconf, hadoop_version): jobconf = jobconf.copy() with no_handlers_for_logger('mrjob.runner'): stderr = StringIO() log_to_stream('mrjob.runner', stderr) self.runner._update_jobconf_for_hadoop_version( jobconf, hadoop_version) return jobconf, stderr.getvalue()
def test_default_protocols(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.job', stderr) mr_job = MRBoringJob() self.assertEqual(mr_job.options.input_protocol, 'raw_value') self.assertEqual(mr_job.options.protocol, 'json') self.assertEqual(mr_job.options.output_protocol, 'json') self.assertNotIn('deprecated', stderr.getvalue())
def main(): option_parser = make_option_parser() options, args = option_parser.parse_args() if args: option_parser.error('takes no arguments') # set up logging if not options.quiet: log_to_stream(name='mrjob', debug=options.verbose) emr_conn = EMRJobRunner().make_emr_conn() log.info( 'getting info about all job flows (this goes back about 2 weeks)') job_flows = emr_conn.describe_jobflows() now = datetime.utcnow() num_running = 0 num_idle = 0 num_done = 0 # a list of tuples of job flow id, name, idle time (as a timedelta) to_terminate = [] for jf in job_flows: # check if job flow is done if hasattr(jf, 'enddatetime'): num_done += 1 # check if job flow is currently running elif jf.steps and not hasattr(jf.steps[-1], 'enddatetime'): num_running += 1 # job flow is idle. how long? else: num_idle += 1 if jf.steps: idle_since = datetime.strptime( jf.steps[-1].enddatetime, ISO8601) else: idle_since = datetime.strptime( jf.creationdatetime, ISO8601) idle_time = now - idle_since # don't care about fractions of a second idle_time = timedelta(idle_time.days, idle_time.seconds) log.debug('Job flow %s (%s) idle for %s' % (jf.jobflowid, jf.name, idle_time)) if idle_time > timedelta(hours=options.max_hours_idle): to_terminate.append( (jf.jobflowid, jf.name, idle_time)) log.info('Job flow statuses: %d running, %d idle, %d done' % (num_running, num_idle, num_done)) terminate_and_notify(emr_conn, to_terminate, options)
def get_debug_printout(self, opt_store_class, alias, opts): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.runner', stderr, debug=True) # debug printout happens in constructor opt_store_class(alias, opts, []) return stderr.getvalue()
def test_option_debug_printout(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.runner', stderr, debug=True) InlineMRJobRunner(owner='dave') self.assertIn("'owner'", stderr.getvalue()) self.assertIn("'dave'", stderr.getvalue())
def test_empty_runner_error(self): conf = dict(runner=dict(local=dict(local_tmp_dir='/tmp'))) path = self.save_conf('basic', conf) stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.runner', stderr) RunnerOptionStore('inline', {}, [path]) self.assertEqual("No configs specified for inline runner\n", stderr.getvalue())
def test_empty_runner_error(self): conf = dict(runner=dict(local=dict(local_tmp_dir='/tmp'))) path = self.save_conf('basic', conf) stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.runner', stderr) RunnerOptionStore('inline', {}, [path]) self.assertEqual( "No configs specified for inline runner\n", stderr.getvalue())
def test_empty_runner_error(self): conf = dict(runner=dict(local=dict(base_tmp_dir='/tmp'))) path = self.save_conf('basic', conf) stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.conf', stderr) runner = InlineMRJobRunner(conf_path=path) self.assertIn( "no configs for runner type 'inline' in %s" % path, stderr.getvalue())
def test_cleanup_deprecated(self): stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob', stderr) with LocalMRJobRunner(cleanup=CLEANUP_DEFAULT) as runner: self.local_tmp_dir = runner._get_local_tmp_dir() assert os.path.exists(self.local_tmp_dir) assert_equal(os.path.exists(self.local_tmp_dir), False) self.local_tmp_dir = None assert_in('deprecated', stderr.getvalue())
def test_runner_option_store(self): stderr = StringIO() with no_handlers_for_logger('mrjob.conf'): log_to_stream('mrjob.conf', stderr) opts = RunnerOptionStore( 'inline', dict(base_tmp_dir='/scratch'), []) self.assertEqual(opts['local_tmp_dir'], '/scratch') self.assertNotIn('base_tmp_dir', opts) self.assertIn('Deprecated option base_tmp_dir has been renamed' ' to local_tmp_dir', stderr.getvalue())
def test_recurse(self): path = os.path.join(self.tmp_dir, 'LOL.conf') recurse_conf = dict(include=path) with open(path, 'w') as f: dump_mrjob_conf(recurse_conf, f) stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.conf', stderr) RunnerOptionStore('inline', {}, [path]) self.assertIn('%s tries to recursively include %s!' % (path, path), stderr.getvalue())
def test_indentation_is_required(self): lines = ["File System Counters", " FILE: Number of bytes read=8"] with no_handlers_for_logger("mrjob.logs.parse"): stderr = StringIO() log_to_stream("mrjob.logs.parse", stderr) # counter line is interpreted as group self.assertEqual(_parse_indented_counters(lines), {}) # should complain self.assertNotEqual(stderr.getvalue(), "")
def test_recurse(self): path = os.path.join(self.tmp_dir, 'LOL.conf') recurse_conf = dict(include=path) with open(path, 'w') as f: dump_mrjob_conf(recurse_conf, f) stderr = StringIO() with no_handlers_for_logger(): log_to_stream('mrjob.conf', stderr) InlineMRJobRunner(conf_path=path) self.assertIn('%s tries to recursively include %s!' % (path, path), stderr.getvalue())
def test_attrs_should_be_classes(self): with no_handlers_for_logger('mrjob.job'): stderr = StringIO() log_to_stream('mrjob.job', stderr) job = self.StrangeJob() self.assertIsInstance(job.input_protocol(), JSONProtocol) self.assertIsInstance(job.internal_protocol(), JSONProtocol) self.assertIsInstance(job.output_protocol(), JSONProtocol) logs = stderr.getvalue() self.assertIn('INPUT_PROTOCOL should be a class', logs) self.assertIn('INTERNAL_PROTOCOL should be a class', logs) self.assertIn('OUTPUT_PROTOCOL should be a class', logs)
def test_with_header(self): lines = ["Counters: 1", " File System Counters", " FILE: Number of bytes read=86"] with no_handlers_for_logger("mrjob.logs.parse"): stderr = StringIO() log_to_stream("mrjob.logs.parse", stderr) self.assertEqual( _parse_indented_counters(lines), {"File System Counters": {"FILE: Number of bytes read": 86}} ) # header shouldn't freak it out self.assertEqual(stderr.getvalue(), "")
def _test_recoverable_error(self, ex): self.mock_paths = ['/path/to/logs/oak', ex] with no_handlers_for_logger('mrjob.logs.wrap'): stderr = StringIO() log_to_stream('mrjob.logs.wrap', stderr) self.assertEqual(self._ls_logs([['/path/to/logs']]), [dict(path='/path/to/logs/oak')]) self.mock_fs.ls.assert_called_once_with('/path/to/logs') self.assertIn("couldn't ls() /path/to/logs", stderr.getvalue())
def test_passthrough(self): runner = InlineMRJobRunner() with no_handlers_for_logger("mrjob.runner"): stderr = StringIO() log_to_stream("mrjob.runner", stderr) self.assertEqual(runner.ls, runner.fs.ls) # no special rules for underscore methods self.assertEqual(runner._cat_file, runner.fs._cat_file) self.assertIn("deprecated: call InlineMRJobRunner.fs.ls() directly", stderr.getvalue()) self.assertIn("deprecated: call InlineMRJobRunner.fs._cat_file() directly", stderr.getvalue())
def main(): global output_file global total_steps_cap global csv_file global generate_database global edge_prob_type global sql_mode global seeds_file global probs parameters = parser.parse_args() csv_file, db_name, generate_database, edge_prob_type, sql_mode, res_fname, seeds, bfs_method, cores, output_mode, scale = ( parameters.csv, parameters.dataset, parameters.generate_database, parameters.edge_prob_type, parameters.sql, parameters.res_fname, parameters.seeds, parameters.bfs_method, parameters.cores, parameters.output_mode, parameters.scale, ) if bfs_method == "emr": print_out("setting up logging", output_mode) log_to_stream() print_out("done", output_mode) print_out("Evaluation algorithm. Dataset: %s" % (db_name), output_mode) link_server = LinkServerCP(db_name, undirected=parameters.undirected) if bfs_method == "seq": print_out("Loading seeds set", output_mode) seeds_sets = cp.load(open(seeds, "r")) for i in xrange(len(seeds_sets)): EstimateInfluence( link_server, bfs_method, parameters.tau_scale, seeds_sets[i], res_fname, cores, output_mode, scale, parameters.output_results, init_tau=parameters.init_tau, iter_samples=parameters.iter_samples, ) else: EstimateInfluence(link_server, bfs_method, seeds_sets[i], res_fname, cores, output_mode)
def test_pass_through_fields(self): # TODO: currently can't initialize HadoopRunner without setting these runner = HadoopJobRunner( hadoop_bin="hadoooooooooop", hadoop_home="kansas", hadoop_streaming_jar="streaming.jar" ) with no_handlers_for_logger("mrjob.runner"): stderr = StringIO() log_to_stream("mrjob.runner", stderr) self.assertEqual(runner._hadoop_bin, runner.fs._hadoop_bin) # deprecation warning is different for non-functions self.assertIn("deprecated: access HadoopJobRunner.fs._hadoop_bin directly", stderr.getvalue())
def test_io_error(self): self.mock_paths = [ IOError(), ] with no_handlers_for_logger('mrjob.logs.ls'): stderr = StringIO() log_to_stream('mrjob.logs.ls', stderr) self.assertEqual(list(_ls_logs(self.mock_fs, '/path/to/logs')), []) self.mock_fs.ls.assert_called_once_with('/path/to/logs') self.assertIn("couldn't ls() /path/to/logs", stderr.getvalue())
def test_deprecated_alias(self): with no_handlers_for_logger('mrjob.util'): stderr = StringIO() log_to_stream('mrjob.util', stderr) self.assertEqual( list(buffer_iterator_to_line_iterator(chunk for chunk in [b'The quick\nbrown fox\nju', b'mped over\nthe lazy\ndog', b's.\n'])), [b'The quick\n', b'brown fox\n', b'jumped over\n', b'the lazy\n', b'dogs.\n']) self.assertIn('has been renamed', stderr.getvalue())
def test_deprecated_alias(self): with no_handlers_for_logger('mrjob.util'): stderr = StringIO() log_to_stream('mrjob.util', stderr) self.assertEqual( list(buffer_iterator_to_line_iterator( chunk for chunk in [b'The quick\nbrown fox\njumped over\nthe lazy\ndogs.\n']) ), [b'The quick\n', b'brown fox\n', b'jumped over\n', b'the lazy\n', b'dogs.\n']) self.assertIn('has been renamed', stderr.getvalue())
def test_indentation_is_required(self): lines = [ 'File System Counters', ' FILE: Number of bytes read=8', ] with no_handlers_for_logger('mrjob.logs.step'): stderr = StringIO() log_to_stream('mrjob.logs.step', stderr) # counter line is interpreted as group self.assertEqual(_parse_indented_counters(lines), {}) # should complain self.assertNotEqual(stderr.getvalue(), '')
def main(): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args() if args: option_parser.error('takes no arguments') # set up logging if not options.quiet: log_to_stream(name='mrjob', debug=options.verbose) # suppress No handlers could be found for logger "boto" message log_to_stream(name='boto', level=logging.CRITICAL) print_report(options)
def set_up_logging(cls, quiet=False, verbose=False, stream=None): """Set up logging when running from the command line. This is also used by the various command-line utilities. :param bool quiet: If true, don't log. Overrides *verbose*. :param bool verbose: If true, set log level to ``DEBUG`` (default is ``INFO``) :param bool stream: Stream to log to (default is ``sys.stderr``) """ if quiet: log_to_null(name='mrjob') log_to_null(name='__main__') else: log_to_stream(name='mrjob', debug=verbose, stream=stream) log_to_stream(name='__main__', debug=verbose, stream=stream)
def test_pass_through_fields(self): # TODO: currently can't initialize HadoopRunner without setting these runner = HadoopJobRunner(hadoop_bin='hadoooooooooop', hadoop_home='kansas', hadoop_streaming_jar='streaming.jar') with no_handlers_for_logger('mrjob.runner'): stderr = StringIO() log_to_stream('mrjob.runner', stderr) self.assertEqual(runner._hadoop_bin, runner.fs._hadoop_bin) # deprecation warning is different for non-functions self.assertIn( 'deprecated: access HadoopJobRunner.fs._hadoop_bin directly', stderr.getvalue())
def test_passthrough(self): runner = InlineMRJobRunner() with no_handlers_for_logger('mrjob.runner'): stderr = StringIO() log_to_stream('mrjob.runner', stderr) self.assertEqual(runner.ls, runner.fs.ls) # no special rules for underscore methods self.assertEqual(runner._cat_file, runner.fs._cat_file) self.assertIn( 'deprecated: call InlineMRJobRunner.fs.ls() directly', stderr.getvalue()) self.assertIn( 'deprecated: call InlineMRJobRunner.fs._cat_file() directly', stderr.getvalue())
def main(): option_parser = make_option_parser() options, args = option_parser.parse_args() if args: option_parser.error('takes no arguments') # set up logging if not options.quiet: log_to_stream(name='mrjob', debug=options.verbose) # suppress No handlers could be found for logger "boto" message log_to_stream(name='boto', level=logging.CRITICAL) inspect_and_maybe_terminate_job_flows( conf_path=options.conf_path, max_hours_idle=options.max_hours_idle, now=datetime.utcnow(), dry_run=options.dry_run)
def set_up_logging(cls, quiet=False, verbose=False, stream=None): """Set up logging when running from the command line. This is also used by the various command-line utilities. :param bool quiet: If true, don't log. Overrides *verbose*. :param bool verbose: If true, set log level to ``DEBUG`` (default is ``INFO``) :param bool stream: Stream to log to (default is ``sys.stderr``) This will also set up a null log handler for boto3, so we don't get warnings if boto3 tries to log about throttling and whatnot. """ if quiet: log_to_null(name='mrjob') log_to_null(name='__main__') else: log_to_stream(name='mrjob', debug=verbose, stream=stream) log_to_stream(name='__main__', debug=verbose, stream=stream)