コード例 #1
0
    def test_non_log_lines(self):
        lines = StringIO('foo\n'
                         'bar\n'
                         '15/12/11 13:26:08 ERROR streaming.StreamJob:'
                         ' Error Launching job :'
                         ' Output directory already exists\n'
                         'Streaming Command Failed!')

        with no_handlers_for_logger('mrjob.logs.parse'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.parse', stderr)

            self.assertEqual(
                list(_parse_hadoop_log_lines(lines)),
                [
                    # ignore leading non-log lines
                    dict(
                        timestamp='15/12/11 13:26:08',
                        level='ERROR',
                        logger='streaming.StreamJob',
                        thread=None,
                        # no way to know that Streaming Command Failed! wasn't part
                        # of a multi-line message
                        message=('Error Launching job :'
                                 ' Output directory already exists\n'
                                 'Streaming Command Failed!'))
                ])

            # should be one warning for each leading non-log line
            log_lines = stderr.getvalue().splitlines()
            self.assertEqual(len(log_lines), 2)
コード例 #2
0
ファイル: test_job.py プロジェクト: AnthonyNystrom/mrjob
    def test_deprecated_mapper_final_positional_arg(self):
        def mapper(k, v):
            pass

        def reducer(k, v):
            pass

        def mapper_final():
            pass

        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob.job', stderr)
            step = MRJob.mr(mapper, reducer, mapper_final)

        # should be allowed to specify mapper_final as a positional arg,
        # but we log a warning
        self.assertEqual(step, MRJob.mr(mapper=mapper,
                                        reducer=reducer,
                                        mapper_final=mapper_final))
        self.assertIn('mapper_final should be specified', stderr.getvalue())

        # can't specify mapper_final as a positional and keyword arg
        self.assertRaises(
            TypeError,
            MRJob.mr, mapper, reducer, mapper_final, mapper_final=mapper_final)
コード例 #3
0
ファイル: test_parse.py プロジェクト: BeeswaxIO/mrjob
    def test_non_log_lines(self):
        lines = StringIO(
            "foo\n"
            "bar\n"
            "15/12/11 13:26:08 ERROR streaming.StreamJob:"
            " Error Launching job :"
            " Output directory already exists\n"
            "Streaming Command Failed!"
        )

        with no_handlers_for_logger("mrjob.logs.parse"):
            stderr = StringIO()
            log_to_stream("mrjob.logs.parse", stderr)

            self.assertEqual(
                list(_parse_hadoop_log_lines(lines)),
                [
                    # ignore leading non-log lines
                    dict(
                        timestamp="15/12/11 13:26:08",
                        level="ERROR",
                        logger="streaming.StreamJob",
                        thread=None,
                        # no way to know that Streaming Command Failed! wasn't part
                        # of a multi-line message
                        message=(
                            "Error Launching job :" " Output directory already exists\n" "Streaming Command Failed!"
                        ),
                    )
                ],
            )

            # should be one warning for each leading non-log line
            log_lines = stderr.getvalue().splitlines()
            self.assertEqual(len(log_lines), 2)
コード例 #4
0
ファイル: test_dataproc.py プロジェクト: okomestudio/mrjob
    def test_failed_job(self):
        mr_job = MRTwoStepJob(['-r', 'dataproc', '-v'])
        mr_job.sandbox()

        with no_handlers_for_logger('mrjob.dataproc'):
            stderr = StringIO()
            log_to_stream('mrjob.dataproc', stderr)

            self._dataproc_client.job_get_advances_states = (
                collections.deque(['SETUP_DONE', 'RUNNING', 'ERROR']))

            with mr_job.make_runner() as runner:
                self.assertIsInstance(runner, DataprocJobRunner)

                self.assertRaises(StepFailedException, runner.run)

                self.assertIn(' => ERROR\n', stderr.getvalue())

                cluster_id = runner.get_cluster_id()

        # job should get terminated
        cluster = (
            self._dataproc_client._cache_clusters[_TEST_PROJECT][cluster_id])
        cluster_state = self._dataproc_client.get_state(cluster)
        self.assertEqual(cluster_state, 'DELETING')
コード例 #5
0
ファイル: test_job.py プロジェクト: ndimiduk/mrjob
 def assert_hadoop_version(self, JobClass, version_string):
     mr_job = JobClass()
     mock_log = StringIO()
     with no_handlers_for_logger("mrjob.job"):
         log_to_stream("mrjob.job", mock_log)
         self.assertEqual(mr_job.jobconf()["hadoop_version"], version_string)
         self.assertIn("should be a string", mock_log.getvalue())
コード例 #6
0
ファイル: ic_bfs_eval.py プロジェクト: zshwuhan/ic-eval
def main():
    global output_file
    global total_steps_cap
    global csv_file
    global generate_database
    global edge_prob_type
    global sql_mode
    global seeds_file
    global probs

    parameters = parser.parse_args()
    csv_file, db_name, generate_database, edge_prob_type, sql_mode, res_fname, seeds, bfs_method, cores, output_mode, scale = parameters.csv, parameters.dataset, \
        parameters.generate_database, parameters.edge_prob_type, parameters.sql, parameters.res_fname, parameters.seeds, parameters.bfs_method, parameters.cores, parameters.output_mode, parameters.scale

    if bfs_method == 'emr':
        print_out("setting up logging", output_mode)
        log_to_stream()
        print_out("done", output_mode)
    print_out('Evaluation algorithm. Dataset: %s' % (db_name), output_mode)
    link_server = LinkServerCP(db_name, undirected=parameters.undirected)
    if bfs_method == 'seq':
        print_out('Loading seeds set', output_mode)
        seeds_sets = cp.load(open(seeds, 'r'))

        for i in xrange(len(seeds_sets)):
            EstimateInfluence(link_server, bfs_method, parameters.tau_scale, seeds_sets[i], res_fname,\
                               cores, output_mode,scale, parameters.output_results,\
                               init_tau = parameters.init_tau, iter_samples = parameters.iter_samples)
    else:
        EstimateInfluence(link_server, bfs_method, seeds_sets[i], res_fname,
                          cores, output_mode)
コード例 #7
0
def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)

    # create the persistent job
    runner_kwargs = {
        'conf_path': options.conf_path,
        'ec2_instance_type': options.ec2_instance_type,
        'ec2_master_instance_type': options.ec2_master_instance_type,
        'ec2_slave_instance_type': options.ec2_slave_instance_type,
        'label': options.label,
        'num_ec2_instances': options.num_ec2_instances,
        'owner': options.owner,
    }
    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id
コード例 #8
0
ファイル: test_parse.py プロジェクト: sebratt/mrjob
    def test_non_log_lines(self):
        lines = StringIO('foo\n'
                         'bar\n'
                         '15/12/11 13:26:08 ERROR streaming.StreamJob:'
                         ' Error Launching job :'
                         ' Output directory already exists\n'
                         'Streaming Command Failed!')

        with no_handlers_for_logger('mrjob.logs.parse'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.parse', stderr)

            self.assertEqual(
            list(_parse_hadoop_log_lines(lines)), [
                # ignore leading non-log lines
                dict(
                    timestamp='15/12/11 13:26:08',
                    level='ERROR',
                    logger='streaming.StreamJob',
                    thread=None,
                    # no way to know that Streaming Command Failed! wasn't part
                    # of a multi-line message
                    message=('Error Launching job :'
                             ' Output directory already exists\n'
                             'Streaming Command Failed!'))
            ])

            # should be one warning for each leading non-log line
            log_lines = stderr.getvalue().splitlines()
            self.assertEqual(len(log_lines), 2)
コード例 #9
0
    def test_hadoop_runner_option_store(self):
        stderr = StringIO()
        with no_handlers_for_logger('mrjob.conf'):
            log_to_stream('mrjob.conf', stderr)

            # HadoopRunnerOptionStore really wants to find the streaming jar
            with patch.object(mrjob.hadoop,
                              'find_hadoop_streaming_jar',
                              return_value='found'):
                opts = HadoopRunnerOptionStore(
                    'hadoop',
                    dict(base_tmp_dir='/scratch',
                         hadoop_home='required',
                         hdfs_scratch_dir='hdfs:///scratch'), [])

            self.assertEqual(opts['local_tmp_dir'], '/scratch')
            self.assertNotIn('base_tmp_dir', opts)
            self.assertIn(
                'Deprecated option base_tmp_dir has been renamed'
                ' to local_tmp_dir', stderr.getvalue())

            self.assertEqual(opts['hadoop_tmp_dir'], 'hdfs:///scratch')
            self.assertNotIn('hdfs_scratch_dir', opts)
            self.assertIn(
                'Deprecated option hdfs_scratch_dir has been renamed'
                ' to hadoop_tmp_dir', stderr.getvalue())
コード例 #10
0
ファイル: test_job.py プロジェクト: bchess/mrjob
    def test_deprecated_mapper_final_positional_arg(self):
        def mapper(k, v):
            pass

        def reducer(k, v):
            pass

        def mapper_final():
            pass

        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob.job', stderr)
            step = MRJob.mr(mapper, reducer, mapper_final)

        # should be allowed to specify mapper_final as a positional arg,
        # but we log a warning
        self.assertEqual(
            step,
            MRJob.mr(
                mapper=mapper, reducer=reducer, mapper_final=mapper_final))
        self.assertIn('mapper_final should be specified', stderr.getvalue())

        # can't specify mapper_final as a positional and keyword arg
        self.assertRaises(
            TypeError,
            MRJob.mr,
            mapper,
            reducer,
            mapper_final,
            mapper_final=mapper_final)
コード例 #11
0
ファイル: test_option_store.py プロジェクト: irskep/mrjob
    def test_cleanup_options(self):
        stderr = StringIO()
        with no_handlers_for_logger('mrjob.runner'):
            log_to_stream('mrjob.runner', stderr)
            opts = RunnerOptionStore(
                'inline',
                dict(cleanup=['LOCAL_SCRATCH', 'REMOTE_SCRATCH'],
                     cleanup_on_failure=['JOB_FLOW', 'SCRATCH']),
                [])

            self.assertEqual(opts['cleanup'], ['LOCAL_TMP', 'CLOUD_TMP'])
            self.assertIn(
                'Deprecated cleanup option LOCAL_SCRATCH has been renamed'
                ' to LOCAL_TMP', stderr.getvalue())
            self.assertIn(
                'Deprecated cleanup option REMOTE_SCRATCH has been renamed'
                ' to CLOUD_TMP', stderr.getvalue())

            self.assertEqual(opts['cleanup_on_failure'], ['CLUSTER', 'TMP'])
            self.assertIn(
                'Deprecated cleanup_on_failure option JOB_FLOW has been'
                ' renamed to CLUSTER', stderr.getvalue())
            self.assertIn(
                'Deprecated cleanup_on_failure option SCRATCH has been renamed'
                ' to TMP', stderr.getvalue())
コード例 #12
0
ファイル: parse_test.py プロジェクト: gimlids/LTPM
 def test_messy_error(self):
     counter_string = 'Job JOBID="_001" FAILED_REDUCES="0" COUNTERS="THIS IS NOT ACTUALLY A COUNTER"'
     with no_handlers_for_logger(''):
         stderr = StringIO()
         log_to_stream('mrjob.parse', stderr, level=logging.WARN)
         assert_equal((None, None), parse_hadoop_counters_from_line(counter_string))
         assert_in('Cannot parse Hadoop counter line', stderr.getvalue())
コード例 #13
0
ファイル: create_job_flow.py プロジェクト: chomp/mrjob
def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)

    # create the persistent job
    runner_kwargs = {
        'conf_path': options.conf_path,
        'ec2_instance_type': options.ec2_instance_type,
        'ec2_master_instance_type': options.ec2_master_instance_type,
        'ec2_slave_instance_type': options.ec2_slave_instance_type,
        'label': options.label,
        'num_ec2_instances': options.num_ec2_instances,
        'owner': options.owner,
    }
    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id
コード例 #14
0
ファイル: test_dataproc.py プロジェクト: rpmirish12/Learning
    def test_failed_job(self):
        mr_job = MRTwoStepJob(['-r', 'dataproc', '-v'])
        mr_job.sandbox()

        with no_handlers_for_logger('mrjob.dataproc'):
            stderr = StringIO()
            log_to_stream('mrjob.dataproc', stderr)

            self._dataproc_client.job_get_advances_states = (collections.deque(
                ['SETUP_DONE', 'RUNNING', 'ERROR']))

            with mr_job.make_runner() as runner:
                self.assertIsInstance(runner, DataprocJobRunner)

                self.assertRaises(StepFailedException, runner.run)

                self.assertIn(' => ERROR\n', stderr.getvalue())

                cluster_id = runner.get_cluster_id()

        # job should get terminated
        cluster = (
            self._dataproc_client._cache_clusters[_TEST_PROJECT][cluster_id])
        cluster_state = self._dataproc_client.get_state(cluster)
        self.assertEqual(cluster_state, 'DELETING')
コード例 #15
0
ファイル: test_option_store.py プロジェクト: gaker/mrjob
    def test_cleanup_options(self):
        stderr = StringIO()
        with no_handlers_for_logger('mrjob.runner'):
            log_to_stream('mrjob.runner', stderr)
            opts = RunnerOptionStore(
                'inline',
                dict(cleanup=['LOCAL_SCRATCH', 'REMOTE_SCRATCH'],
                     cleanup_on_failure=['JOB_FLOW', 'SCRATCH']),
                [])

            self.assertEqual(opts['cleanup'], ['LOCAL_TMP', 'CLOUD_TMP'])
            self.assertIn(
                'Deprecated cleanup option LOCAL_SCRATCH has been renamed'
                ' to LOCAL_TMP', stderr.getvalue())
            self.assertIn(
                'Deprecated cleanup option REMOTE_SCRATCH has been renamed'
                ' to CLOUD_TMP', stderr.getvalue())

            self.assertEqual(opts['cleanup_on_failure'], ['CLUSTER', 'TMP'])
            self.assertIn(
                'Deprecated cleanup_on_failure option JOB_FLOW has been'
                ' renamed to CLUSTER', stderr.getvalue())
            self.assertIn(
                'Deprecated cleanup_on_failure option SCRATCH has been renamed'
                ' to TMP', stderr.getvalue())
コード例 #16
0
ファイル: test_job.py プロジェクト: bchess/mrjob
 def test_mixed_behavior_2(self):
     stderr = StringIO()
     with no_handlers_for_logger():
         log_to_stream('mrjob.job', stderr)
         mr_job = self.MRInconsistentJob2()
         self.assertEqual(mr_job.options.input_protocol, None)
         self.assertEqual(mr_job.input_protocol().__class__, ReprProtocol)
         self.assertIn('custom behavior', stderr.getvalue())
コード例 #17
0
ファイル: job_test.py プロジェクト: bopopescu/LTPM
 def test_mixed_behavior_2(self):
     stderr = StringIO()
     with no_handlers_for_logger():
         log_to_stream('mrjob.job', stderr)
         mr_job = self.MRInconsistentJob2()
         assert_equal(mr_job.options.input_protocol, None)
         assert_equal(mr_job.input_protocol().__class__, ReprProtocol)
         assert_in('custom behavior', stderr.getvalue())
コード例 #18
0
 def test_messy_error(self):
     counter_string = 'Job JOBID="_001" FAILED_REDUCES="0" COUNTERS="THIS IS NOT ACTUALLY A COUNTER"'
     with no_handlers_for_logger(''):
         stderr = StringIO()
         log_to_stream('mrjob.parse', stderr, level=logging.WARN)
         assert_equal((None, None),
                      parse_hadoop_counters_from_line(counter_string))
         assert_in('Cannot parse Hadoop counter line', stderr.getvalue())
コード例 #19
0
 def assert_hadoop_version(self, JobClass, version_string):
     mr_job = JobClass()
     mock_log = StringIO()
     with no_handlers_for_logger('mrjob.job'):
         log_to_stream('mrjob.job', mock_log)
         self.assertEqual(mr_job.jobconf()['hadoop_version'],
                          version_string)
         self.assertIn('should be a string', mock_log.getvalue())
コード例 #20
0
ファイル: test_runner.py プロジェクト: irskep/mrjob
    def updated_and_warnings(self, jobconf, hadoop_version):
        jobconf = jobconf.copy()
        with no_handlers_for_logger("mrjob.runner"):
            stderr = StringIO()
            log_to_stream("mrjob.runner", stderr)
            self.runner._update_jobconf_for_hadoop_version(jobconf, hadoop_version)

        return jobconf, stderr.getvalue()
コード例 #21
0
ファイル: job_test.py プロジェクト: bopopescu/LTPM
 def test_default_protocols(self):
     stderr = StringIO()
     with no_handlers_for_logger():
         log_to_stream('mrjob.job', stderr)
         mr_job = MRBoringJob()
         assert_equal(mr_job.options.input_protocol, 'raw_value')
         assert_equal(mr_job.options.protocol, 'json')
         assert_equal(mr_job.options.output_protocol, 'json')
         assert_not_in('deprecated', stderr.getvalue())
コード例 #22
0
ファイル: job_test.py プロジェクト: bopopescu/LTPM
 def test_overriding_explicit_default_protocols(self):
     stderr = StringIO()
     with no_handlers_for_logger():
         log_to_stream('mrjob.job', stderr)
         mr_job = self.MRBoringJob2(args=['--protocol=json'])
         assert_equal(mr_job.options.input_protocol, 'json')
         assert_equal(mr_job.options.protocol, 'json')
         assert_equal(mr_job.options.output_protocol, 'repr')
         assert_in('deprecated', stderr.getvalue())
コード例 #23
0
ファイル: test_job.py プロジェクト: bchess/mrjob
 def test_overriding_explicit_default_protocols(self):
     stderr = StringIO()
     with no_handlers_for_logger():
         log_to_stream('mrjob.job', stderr)
         mr_job = self.MRBoringJob2(args=['--protocol=json'])
         self.assertEqual(mr_job.options.input_protocol, 'json')
         self.assertEqual(mr_job.options.protocol, 'json')
         self.assertEqual(mr_job.options.output_protocol, 'repr')
         self.assertIn('deprecated', stderr.getvalue())
コード例 #24
0
ファイル: test_runner.py プロジェクト: Milkigit/mrjob
    def updated_and_warnings(self, jobconf, hadoop_version):
        jobconf = jobconf.copy()
        with no_handlers_for_logger('mrjob.runner'):
            stderr = StringIO()
            log_to_stream('mrjob.runner', stderr)
            self.runner._update_jobconf_for_hadoop_version(
                jobconf, hadoop_version)

        return jobconf, stderr.getvalue()
コード例 #25
0
ファイル: test_job.py プロジェクト: bchess/mrjob
 def test_default_protocols(self):
     stderr = StringIO()
     with no_handlers_for_logger():
         log_to_stream('mrjob.job', stderr)
         mr_job = MRBoringJob()
         self.assertEqual(mr_job.options.input_protocol, 'raw_value')
         self.assertEqual(mr_job.options.protocol, 'json')
         self.assertEqual(mr_job.options.output_protocol, 'json')
         self.assertNotIn('deprecated', stderr.getvalue())
コード例 #26
0
def main():
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()
    
    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)

    emr_conn = EMRJobRunner().make_emr_conn()

    log.info(
        'getting info about all job flows (this goes back about 2 weeks)')
    job_flows = emr_conn.describe_jobflows()
        
    now = datetime.utcnow()

    num_running = 0
    num_idle = 0
    num_done = 0
    # a list of tuples of job flow id, name, idle time (as a timedelta)
    to_terminate = []

    for jf in job_flows:
        # check if job flow is done
        if hasattr(jf, 'enddatetime'):
            num_done += 1
        # check if job flow is currently running
        elif jf.steps and not hasattr(jf.steps[-1], 'enddatetime'):
            num_running += 1
        # job flow is idle. how long?
        else:
            num_idle += 1
            if jf.steps:
                idle_since = datetime.strptime(
                    jf.steps[-1].enddatetime, ISO8601)
            else:
                idle_since = datetime.strptime(
                    jf.creationdatetime, ISO8601)
            idle_time = now - idle_since

            # don't care about fractions of a second
            idle_time = timedelta(idle_time.days, idle_time.seconds)

            log.debug('Job flow %s (%s) idle for %s' %
                           (jf.jobflowid, jf.name, idle_time))
            if idle_time > timedelta(hours=options.max_hours_idle):
                to_terminate.append(
                    (jf.jobflowid, jf.name, idle_time))

    log.info('Job flow statuses: %d running, %d idle, %d done' %
                  (num_running, num_idle, num_done))

    terminate_and_notify(emr_conn, to_terminate, options)
コード例 #27
0
    def get_debug_printout(self, opt_store_class, alias, opts):
        stderr = StringIO()

        with no_handlers_for_logger():
            log_to_stream('mrjob.runner', stderr, debug=True)

            # debug printout happens in constructor
            opt_store_class(alias, opts, [])

        return stderr.getvalue()
コード例 #28
0
ファイル: test_runner.py プロジェクト: shashankn91/mrjob
    def test_option_debug_printout(self):
        stderr = StringIO()

        with no_handlers_for_logger():
            log_to_stream('mrjob.runner', stderr, debug=True)

            InlineMRJobRunner(owner='dave')

        self.assertIn("'owner'", stderr.getvalue())
        self.assertIn("'dave'", stderr.getvalue())
コード例 #29
0
    def test_empty_runner_error(self):
        conf = dict(runner=dict(local=dict(local_tmp_dir='/tmp')))
        path = self.save_conf('basic', conf)

        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob.runner', stderr)
            RunnerOptionStore('inline', {}, [path])
            self.assertEqual("No configs specified for inline runner\n",
                             stderr.getvalue())
コード例 #30
0
ファイル: test_runner.py プロジェクト: okomestudio/mrjob
    def test_option_debug_printout(self):
        stderr = StringIO()

        with no_handlers_for_logger():
            log_to_stream('mrjob.runner', stderr, debug=True)

            InlineMRJobRunner(owner='dave')

        self.assertIn("'owner'", stderr.getvalue())
        self.assertIn("'dave'", stderr.getvalue())
コード例 #31
0
    def get_debug_printout(self, opt_store_class, alias, opts):
        stderr = StringIO()

        with no_handlers_for_logger():
            log_to_stream('mrjob.runner', stderr, debug=True)

            # debug printout happens in constructor
            opt_store_class(alias, opts, [])

        return stderr.getvalue()
コード例 #32
0
ファイル: test_option_store.py プロジェクト: irskep/mrjob
    def test_empty_runner_error(self):
        conf = dict(runner=dict(local=dict(local_tmp_dir='/tmp')))
        path = self.save_conf('basic', conf)

        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob.runner', stderr)
            RunnerOptionStore('inline', {}, [path])
            self.assertEqual(
                "No configs specified for inline runner\n",
                stderr.getvalue())
コード例 #33
0
ファイル: test_runner.py プロジェクト: BrandonHaynes/mrjob
    def test_empty_runner_error(self):
        conf = dict(runner=dict(local=dict(base_tmp_dir='/tmp')))
        path = self.save_conf('basic', conf)

        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob.conf', stderr)
            runner = InlineMRJobRunner(conf_path=path)
            self.assertIn(
                "no configs for runner type 'inline' in %s" % path,
                stderr.getvalue())
コード例 #34
0
    def test_cleanup_deprecated(self):
        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob', stderr)
            with LocalMRJobRunner(cleanup=CLEANUP_DEFAULT) as runner:
                self.local_tmp_dir = runner._get_local_tmp_dir()
                assert os.path.exists(self.local_tmp_dir)

            assert_equal(os.path.exists(self.local_tmp_dir), False)
            self.local_tmp_dir = None
            assert_in('deprecated', stderr.getvalue())
コード例 #35
0
ファイル: test_option_store.py プロジェクト: gaker/mrjob
    def test_runner_option_store(self):
        stderr = StringIO()
        with no_handlers_for_logger('mrjob.conf'):
            log_to_stream('mrjob.conf', stderr)
            opts = RunnerOptionStore(
                'inline', dict(base_tmp_dir='/scratch'), [])

            self.assertEqual(opts['local_tmp_dir'], '/scratch')
            self.assertNotIn('base_tmp_dir', opts)
            self.assertIn('Deprecated option base_tmp_dir has been renamed'
                          ' to local_tmp_dir', stderr.getvalue())
コード例 #36
0
ファイル: test_option_store.py プロジェクト: irskep/mrjob
    def test_runner_option_store(self):
        stderr = StringIO()
        with no_handlers_for_logger('mrjob.conf'):
            log_to_stream('mrjob.conf', stderr)
            opts = RunnerOptionStore(
                'inline', dict(base_tmp_dir='/scratch'), [])

            self.assertEqual(opts['local_tmp_dir'], '/scratch')
            self.assertNotIn('base_tmp_dir', opts)
            self.assertIn('Deprecated option base_tmp_dir has been renamed'
                          ' to local_tmp_dir', stderr.getvalue())
コード例 #37
0
ファイル: test_option_store.py プロジェクト: trisch-me/mrjob
    def test_recurse(self):
        path = os.path.join(self.tmp_dir, 'LOL.conf')
        recurse_conf = dict(include=path)
        with open(path, 'w') as f:
            dump_mrjob_conf(recurse_conf, f)

        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob.conf', stderr)
            RunnerOptionStore('inline', {}, [path])
            self.assertIn('%s tries to recursively include %s!' % (path, path),
                          stderr.getvalue())
コード例 #38
0
ファイル: test_parse.py プロジェクト: BeeswaxIO/mrjob
    def test_indentation_is_required(self):
        lines = ["File System Counters", "   FILE: Number of bytes read=8"]

        with no_handlers_for_logger("mrjob.logs.parse"):
            stderr = StringIO()
            log_to_stream("mrjob.logs.parse", stderr)

            # counter line is interpreted as group
            self.assertEqual(_parse_indented_counters(lines), {})

            # should complain
            self.assertNotEqual(stderr.getvalue(), "")
コード例 #39
0
ファイル: test_runner.py プロジェクト: nyccto/mrjob
    def test_recurse(self):
        path = os.path.join(self.tmp_dir, 'LOL.conf')
        recurse_conf = dict(include=path)
        with open(path, 'w') as f:
            dump_mrjob_conf(recurse_conf, f)

        stderr = StringIO()
        with no_handlers_for_logger():
            log_to_stream('mrjob.conf', stderr)
            InlineMRJobRunner(conf_path=path)
            self.assertIn('%s tries to recursively include %s!' % (path, path),
                          stderr.getvalue())
コード例 #40
0
 def test_attrs_should_be_classes(self):
     with no_handlers_for_logger('mrjob.job'):
         stderr = StringIO()
         log_to_stream('mrjob.job', stderr)
         job = self.StrangeJob()
         self.assertIsInstance(job.input_protocol(), JSONProtocol)
         self.assertIsInstance(job.internal_protocol(), JSONProtocol)
         self.assertIsInstance(job.output_protocol(), JSONProtocol)
         logs = stderr.getvalue()
         self.assertIn('INPUT_PROTOCOL should be a class', logs)
         self.assertIn('INTERNAL_PROTOCOL should be a class', logs)
         self.assertIn('OUTPUT_PROTOCOL should be a class', logs)
コード例 #41
0
ファイル: test_job.py プロジェクト: okomestudio/mrjob
 def test_attrs_should_be_classes(self):
     with no_handlers_for_logger('mrjob.job'):
         stderr = StringIO()
         log_to_stream('mrjob.job', stderr)
         job = self.StrangeJob()
         self.assertIsInstance(job.input_protocol(), JSONProtocol)
         self.assertIsInstance(job.internal_protocol(), JSONProtocol)
         self.assertIsInstance(job.output_protocol(), JSONProtocol)
         logs = stderr.getvalue()
         self.assertIn('INPUT_PROTOCOL should be a class', logs)
         self.assertIn('INTERNAL_PROTOCOL should be a class', logs)
         self.assertIn('OUTPUT_PROTOCOL should be a class', logs)
コード例 #42
0
ファイル: test_parse.py プロジェクト: BeeswaxIO/mrjob
    def test_with_header(self):
        lines = ["Counters: 1", "  File System Counters", "    FILE: Number of bytes read=86"]

        with no_handlers_for_logger("mrjob.logs.parse"):
            stderr = StringIO()
            log_to_stream("mrjob.logs.parse", stderr)

            self.assertEqual(
                _parse_indented_counters(lines), {"File System Counters": {"FILE: Number of bytes read": 86}}
            )

            # header shouldn't freak it out
            self.assertEqual(stderr.getvalue(), "")
コード例 #43
0
ファイル: test_wrap.py プロジェクト: wadere/mrjob
    def _test_recoverable_error(self, ex):
        self.mock_paths = ['/path/to/logs/oak', ex]

        with no_handlers_for_logger('mrjob.logs.wrap'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.wrap', stderr)

            self.assertEqual(self._ls_logs([['/path/to/logs']]),
                             [dict(path='/path/to/logs/oak')])

            self.mock_fs.ls.assert_called_once_with('/path/to/logs')

            self.assertIn("couldn't ls() /path/to/logs", stderr.getvalue())
コード例 #44
0
ファイル: test_runner.py プロジェクト: irskep/mrjob
    def test_passthrough(self):
        runner = InlineMRJobRunner()

        with no_handlers_for_logger("mrjob.runner"):
            stderr = StringIO()
            log_to_stream("mrjob.runner", stderr)

            self.assertEqual(runner.ls, runner.fs.ls)
            # no special rules for underscore methods
            self.assertEqual(runner._cat_file, runner.fs._cat_file)

            self.assertIn("deprecated: call InlineMRJobRunner.fs.ls() directly", stderr.getvalue())
            self.assertIn("deprecated: call InlineMRJobRunner.fs._cat_file() directly", stderr.getvalue())
コード例 #45
0
ファイル: ic_bfs_eval.py プロジェクト: joeloren/ic-eval
def main():
    global output_file
    global total_steps_cap
    global csv_file
    global generate_database
    global edge_prob_type
    global sql_mode
    global seeds_file
    global probs

    parameters = parser.parse_args()
    csv_file, db_name, generate_database, edge_prob_type, sql_mode, res_fname, seeds, bfs_method, cores, output_mode, scale = (
        parameters.csv,
        parameters.dataset,
        parameters.generate_database,
        parameters.edge_prob_type,
        parameters.sql,
        parameters.res_fname,
        parameters.seeds,
        parameters.bfs_method,
        parameters.cores,
        parameters.output_mode,
        parameters.scale,
    )

    if bfs_method == "emr":
        print_out("setting up logging", output_mode)
        log_to_stream()
        print_out("done", output_mode)
    print_out("Evaluation algorithm. Dataset: %s" % (db_name), output_mode)
    link_server = LinkServerCP(db_name, undirected=parameters.undirected)
    if bfs_method == "seq":
        print_out("Loading seeds set", output_mode)
        seeds_sets = cp.load(open(seeds, "r"))

        for i in xrange(len(seeds_sets)):
            EstimateInfluence(
                link_server,
                bfs_method,
                parameters.tau_scale,
                seeds_sets[i],
                res_fname,
                cores,
                output_mode,
                scale,
                parameters.output_results,
                init_tau=parameters.init_tau,
                iter_samples=parameters.iter_samples,
            )
    else:
        EstimateInfluence(link_server, bfs_method, seeds_sets[i], res_fname, cores, output_mode)
コード例 #46
0
ファイル: test_runner.py プロジェクト: irskep/mrjob
    def test_pass_through_fields(self):
        # TODO: currently can't initialize HadoopRunner without setting these
        runner = HadoopJobRunner(
            hadoop_bin="hadoooooooooop", hadoop_home="kansas", hadoop_streaming_jar="streaming.jar"
        )

        with no_handlers_for_logger("mrjob.runner"):
            stderr = StringIO()
            log_to_stream("mrjob.runner", stderr)

            self.assertEqual(runner._hadoop_bin, runner.fs._hadoop_bin)

            # deprecation warning is different for non-functions
            self.assertIn("deprecated: access HadoopJobRunner.fs._hadoop_bin directly", stderr.getvalue())
コード例 #47
0
    def test_io_error(self):
        self.mock_paths = [
            IOError(),
        ]

        with no_handlers_for_logger('mrjob.logs.ls'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.ls', stderr)

            self.assertEqual(list(_ls_logs(self.mock_fs, '/path/to/logs')), [])

            self.mock_fs.ls.assert_called_once_with('/path/to/logs')

            self.assertIn("couldn't ls() /path/to/logs", stderr.getvalue())
コード例 #48
0
    def test_deprecated_alias(self):
        with no_handlers_for_logger('mrjob.util'):
            stderr = StringIO()
            log_to_stream('mrjob.util', stderr)

            self.assertEqual(
                list(buffer_iterator_to_line_iterator(chunk for chunk in
                          [b'The quick\nbrown fox\nju',
                           b'mped over\nthe lazy\ndog',
                           b's.\n'])),
            [b'The quick\n', b'brown fox\n', b'jumped over\n', b'the lazy\n',
             b'dogs.\n'])

            self.assertIn('has been renamed', stderr.getvalue())
コード例 #49
0
ファイル: test_util.py プロジェクト: anirudhreddy92/mrjob
    def test_deprecated_alias(self):
        with no_handlers_for_logger('mrjob.util'):
            stderr = StringIO()
            log_to_stream('mrjob.util', stderr)

            self.assertEqual(
                list(buffer_iterator_to_line_iterator(
                    chunk for chunk in
                    [b'The quick\nbrown fox\njumped over\nthe lazy\ndogs.\n'])
                ),
                [b'The quick\n', b'brown fox\n', b'jumped over\n',
                 b'the lazy\n', b'dogs.\n'])

            self.assertIn('has been renamed', stderr.getvalue())
コード例 #50
0
ファイル: test_ls.py プロジェクト: BeeswaxIO/mrjob
    def test_io_error(self):
        self.mock_paths = [
            IOError(),
        ]

        with no_handlers_for_logger('mrjob.logs.ls'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.ls', stderr)

            self.assertEqual(list(_ls_logs(self.mock_fs, '/path/to/logs')), [])

            self.mock_fs.ls.assert_called_once_with('/path/to/logs')

            self.assertIn("couldn't ls() /path/to/logs", stderr.getvalue())
コード例 #51
0
ファイル: test_step.py プロジェクト: pradeep1288/mrjob
    def test_indentation_is_required(self):
        lines = [
            'File System Counters',
            '   FILE: Number of bytes read=8',
        ]

        with no_handlers_for_logger('mrjob.logs.step'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.step', stderr)

            # counter line is interpreted as group
            self.assertEqual(_parse_indented_counters(lines), {})

            # should complain
            self.assertNotEqual(stderr.getvalue(), '')
コード例 #52
0
def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)
    # suppress No handlers could be found for logger "boto" message
    log_to_stream(name='boto', level=logging.CRITICAL)

    print_report(options)
コード例 #53
0
ファイル: audit_usage.py プロジェクト: AntonKast/mrjob
def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)
    # suppress No handlers could be found for logger "boto" message
    log_to_stream(name='boto', level=logging.CRITICAL)

    print_report(options)
コード例 #54
0
ファイル: launch.py プロジェクト: Yelp/mrjob
    def set_up_logging(cls, quiet=False, verbose=False, stream=None):
        """Set up logging when running from the command line. This is also
        used by the various command-line utilities.

        :param bool quiet: If true, don't log. Overrides *verbose*.
        :param bool verbose: If true, set log level to ``DEBUG`` (default is
                             ``INFO``)
        :param bool stream: Stream to log to (default is ``sys.stderr``)
        """
        if quiet:
            log_to_null(name='mrjob')
            log_to_null(name='__main__')
        else:
            log_to_stream(name='mrjob', debug=verbose, stream=stream)
            log_to_stream(name='__main__', debug=verbose, stream=stream)
コード例 #55
0
    def set_up_logging(cls, quiet=False, verbose=False, stream=None):
        """Set up logging when running from the command line. This is also
        used by the various command-line utilities.

        :param bool quiet: If true, don't log. Overrides *verbose*.
        :param bool verbose: If true, set log level to ``DEBUG`` (default is
                             ``INFO``)
        :param bool stream: Stream to log to (default is ``sys.stderr``)
        """
        if quiet:
            log_to_null(name='mrjob')
            log_to_null(name='__main__')
        else:
            log_to_stream(name='mrjob', debug=verbose, stream=stream)
            log_to_stream(name='__main__', debug=verbose, stream=stream)
コード例 #56
0
ファイル: test_runner.py プロジェクト: mtai/mrjob
    def test_pass_through_fields(self):
        # TODO: currently can't initialize HadoopRunner without setting these
        runner = HadoopJobRunner(hadoop_bin='hadoooooooooop',
                                 hadoop_home='kansas',
                                 hadoop_streaming_jar='streaming.jar')

        with no_handlers_for_logger('mrjob.runner'):
            stderr = StringIO()
            log_to_stream('mrjob.runner', stderr)

            self.assertEqual(runner._hadoop_bin, runner.fs._hadoop_bin)

            # deprecation warning is different for non-functions
            self.assertIn(
                'deprecated: access HadoopJobRunner.fs._hadoop_bin directly',
                stderr.getvalue())
コード例 #57
0
ファイル: test_runner.py プロジェクト: Milkigit/mrjob
    def test_passthrough(self):
        runner = InlineMRJobRunner()

        with no_handlers_for_logger('mrjob.runner'):
            stderr = StringIO()
            log_to_stream('mrjob.runner', stderr)

            self.assertEqual(runner.ls, runner.fs.ls)
            # no special rules for underscore methods
            self.assertEqual(runner._cat_file, runner.fs._cat_file)

            self.assertIn(
                'deprecated: call InlineMRJobRunner.fs.ls() directly',
                stderr.getvalue())
            self.assertIn(
                'deprecated: call InlineMRJobRunner.fs._cat_file() directly',
                stderr.getvalue())
コード例 #58
0
def main():
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)
    # suppress No handlers could be found for logger "boto" message
    log_to_stream(name='boto', level=logging.CRITICAL)

    inspect_and_maybe_terminate_job_flows(
        conf_path=options.conf_path,
        max_hours_idle=options.max_hours_idle,
        now=datetime.utcnow(),
        dry_run=options.dry_run)
コード例 #59
0
    def set_up_logging(cls, quiet=False, verbose=False, stream=None):
        """Set up logging when running from the command line. This is also
        used by the various command-line utilities.

        :param bool quiet: If true, don't log. Overrides *verbose*.
        :param bool verbose: If true, set log level to ``DEBUG`` (default is
                             ``INFO``)
        :param bool stream: Stream to log to (default is ``sys.stderr``)

        This will also set up a null log handler for boto3, so we don't get
        warnings if boto3 tries to log about throttling and whatnot.
        """
        if quiet:
            log_to_null(name='mrjob')
            log_to_null(name='__main__')
        else:
            log_to_stream(name='mrjob', debug=verbose, stream=stream)
            log_to_stream(name='__main__', debug=verbose, stream=stream)