def _start_job(rule, settings, urls=None): """Start a new job for an InfernoRule Note that the output of this function is a tuple of (InfernoJob, DiscoJob) If this InfernoJob fails to start by some reasons, e.g. not enough blobs, the DiscoJob would be None. """ job = InfernoJob(rule, settings, urls) return job, job.start()
def setUp(self): settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12)) rule = InfernoRule(archive=True, max_blobs=self.MAX_BLOBS, name='some_rule_name', archive_tag_prefix='archived', source_tags=['incoming:data:chunk']) self.job = InfernoJob(rule, settings) self.job.disco = Disco() self.job.ddfs = DDFS()
def setUp(self): settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12)) rule = InfernoRule( archive=True, max_blobs=self.MAX_BLOBS, name='some_rule_name', archive_tag_prefix='archived', source_tags=['incoming:data:chunk']) self.job = InfernoJob(rule, settings) self.job.disco = Disco() self.job.ddfs = DDFS()
class TestJob(object): MAX_BLOBS = 1000 ARCHIVE_PREFIX = 'archived' def setUp(self): settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12)) rule = InfernoRule( archive=True, max_blobs=self.MAX_BLOBS, name='some_rule_name', archive_tag_prefix='archived', source_tags=['incoming:data:chunk']) self.job = InfernoJob(rule, settings) self.job.disco = Disco() self.job.ddfs = DDFS() def test_start_not_enough_blobs(self): self.job.rule.min_blobs = 1000 job = self.job.start() eq_(job, None) def test_determine_job_blobs(self): expected_tags = [ 'incoming:data:chunk:2011-11-12', 'incoming:data:chunk:2011-11-11'] expected_blobs = [ ('/b12.1', '/b12.2', '/b12.3'), ('/b11.1', '/b11.2', '/b11.3')] archiver = self.job._determine_job_blobs() try: # check that the archiver was created correctly eq_(archiver.max_blobs, self.MAX_BLOBS) eq_(archiver.archive_mode, True) eq_(archiver.archive_prefix, self.ARCHIVE_PREFIX) # check that it found the correct tags and blobs eq_(archiver.tags, expected_tags) eq_(archiver.job_blobs, expected_blobs) except Exception as e: raise e def test_archive_tags(self): # there should be no archived tags before calling archive archive_prefix = 'archived:data:chunk' archiver = self.job._determine_job_blobs() archived = archiver.ddfs.list(archive_prefix) eq_(archived, []) # should not archive & change state since archive mode is false archiver.archive_mode = False self.job._archive_tags(archiver) archived = archiver.ddfs.list(archive_prefix) eq_(archived, []) # should archive & change state since archive mode is true archiver.archive_mode = True self.job._archive_tags(archiver) archived = archiver.ddfs.list(archive_prefix) expected = [ 'archived:data:chunk:2011-11-11', 'archived:data:chunk:2011-11-12'] eq_(archived, expected) def test_get_job_results_no_results(self): urls = [] actual = self.job._get_job_results(urls) try: eq_(list(actual), []) except Exception as e: raise e def test_process_results_no_results(self): results = [] self.job._process_results(results, 'some_job_id') #eq_(self.job.current_stage, JOB_PROCESS) def test_purge(self): # should purge if there's no 'no_purge' setting ok_('no_purge' not in self.job.settings) self.job._purge('some_job_name') # should not purge & change state self.job._purge('some_job_name') # should purge & change state self.job.settings['no_purge'] = False self.job._purge('some_job_name') def test_profile(self): # should not profile if there's no 'profile' setting ok_('profile' not in self.job.settings) self.job._profile(Mock()) # should not profile & change state self.job.settings['profle'] = False self.job._profile(Mock()) # should profile & change state self.job.settings['profile'] = True self.job._profile(Mock()) def test_tag_results(self): # should not tag results & change state self.job.settings['result_tag'] = None self.job._tag_results('some_job_name') # should tag results & change state before = self.job.ddfs.list('some_result_tag') eq_(before, []) def test_enough_blobs(self): # equal self.job.rule.min_blobs = 100 self.job.settings['force'] = False eq_(self.job._enough_blobs(blob_count=100), True) # more than self.job.rule.min_blobs = 100 self.job.settings['force'] = False eq_(self.job._enough_blobs(blob_count=101), True) # less than self.job.rule.min_blobs = 100 self.job.settings['force'] = False eq_(self.job._enough_blobs(blob_count=99), False) # less than, but force mode is enabled self.job.rule.min_blobs = 100 self.job.settings['force'] = True eq_(self.job._enough_blobs(blob_count=99), True) def test_str(self): eq_(str(self.job), '<InfernoJob for: some_rule_name>')
def setUp(self): self.settings = InfernoSettings() self._make_temp_pid_dir() self.job = InfernoJob(InfernoRule(name='some_rule_name'), {}, Params()) self.pid_dir = pid.pid_dir(self.settings)
class TestJob(object): MAX_BLOBS = 1000 ARCHIVE_PREFIX = 'archived' def setUp(self): settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12)) rule = InfernoRule(archive=True, max_blobs=self.MAX_BLOBS, name='some_rule_name', archive_tag_prefix='archived', source_tags=['incoming:data:chunk']) self.job = InfernoJob(rule, settings) self.job.disco = Disco() self.job.ddfs = DDFS() def test_start_not_enough_blobs(self): self.job.rule.min_blobs = 1000 job = self.job.start() eq_(job, None) def test_determine_job_blobs(self): expected_tags = [ 'incoming:data:chunk:2011-11-12', 'incoming:data:chunk:2011-11-11' ] expected_blobs = [('/b12.1', '/b12.2', '/b12.3'), ('/b11.1', '/b11.2', '/b11.3')] archiver = self.job._determine_job_blobs() try: # check that the archiver was created correctly eq_(archiver.max_blobs, self.MAX_BLOBS) eq_(archiver.archive_mode, True) eq_(archiver.archive_prefix, self.ARCHIVE_PREFIX) # check that it found the correct tags and blobs eq_(archiver.tags, expected_tags) eq_(archiver.job_blobs, expected_blobs) except Exception as e: raise e def test_archive_tags(self): # there should be no archived tags before calling archive archive_prefix = 'archived:data:chunk' archiver = self.job._determine_job_blobs() archived = archiver.ddfs.list(archive_prefix) eq_(archived, []) # should not archive & change state since archive mode is false archiver.archive_mode = False self.job._archive_tags(archiver) archived = archiver.ddfs.list(archive_prefix) eq_(archived, []) # should archive & change state since archive mode is true archiver.archive_mode = True self.job._archive_tags(archiver) archived = archiver.ddfs.list(archive_prefix) expected = [ 'archived:data:chunk:2011-11-11', 'archived:data:chunk:2011-11-12' ] eq_(archived, expected) def test_get_job_results_no_results(self): urls = [] actual = self.job._get_job_results(urls) try: eq_(list(actual), []) except Exception as e: raise e def test_process_results_no_results(self): results = [] self.job._process_results(results, 'some_job_id') #eq_(self.job.current_stage, JOB_PROCESS) def test_purge(self): # should purge if there's no 'no_purge' setting ok_('no_purge' not in self.job.settings) self.job._purge('some_job_name') # should not purge & change state self.job._purge('some_job_name') # should purge & change state self.job.settings['no_purge'] = False self.job._purge('some_job_name') def test_profile(self): # should not profile if there's no 'profile' setting ok_('profile' not in self.job.settings) self.job._profile(Mock()) # should not profile & change state self.job.settings['profle'] = False self.job._profile(Mock()) # should profile & change state self.job.settings['profile'] = True self.job._profile(Mock()) def test_tag_results(self): # should not tag results & change state self.job.settings['result_tag'] = None self.job._tag_results('some_job_name') # should tag results & change state before = self.job.ddfs.list('some_result_tag') eq_(before, []) def test_enough_blobs(self): # equal self.job.rule.min_blobs = 100 self.job.settings['force'] = False eq_(self.job._enough_blobs(blob_count=100), True) # more than self.job.rule.min_blobs = 100 self.job.settings['force'] = False eq_(self.job._enough_blobs(blob_count=101), True) # less than self.job.rule.min_blobs = 100 self.job.settings['force'] = False eq_(self.job._enough_blobs(blob_count=99), False) # less than, but force mode is enabled self.job.rule.min_blobs = 100 self.job.settings['force'] = True eq_(self.job._enough_blobs(blob_count=99), True) def test_str(self): eq_(str(self.job), '<InfernoJob for: some_rule_name>')
def main(argv=sys.argv): options, parser = _get_options(argv[1:]) settings = _get_settings(options) if options['example_rules']: try: os.mkdir(options['example_rules']) here = os.path.dirname(__file__) src_dir = os.path.join(here, '..', 'example_rules') src_dir = os.path.abspath(src_dir) dst_dir = os.path.abspath(options['example_rules']) for name in os.listdir(src_dir): if name.endswith('.py'): src = os.path.join(src_dir, name) dst = os.path.join(dst_dir, name) shutil.copy(src, dst) print '\n\tCreated example rules dir:\n\n\t\t%s' % dst_dir for name in os.listdir(dst_dir): print '\t\t\t', name except Exception as e: print 'Error creating example rules dir %r' % (e) finally: return _setup_logging(settings) for path in settings.get('extra_python_paths'): sys.path.insert(0, path) if options['process_results']: settings['no_purge'] = True rules_dir = options.get('rules_directory') if not rules_dir: rules_dir = settings.get('rules_directory') try: rule_name = options['process_results'].split('@')[0] job_name = options['process_results'].split('.')[1] rule = get_rules_by_name(rule_name, rules_dir, immediate=True)[0] job = InfernoJob(rule, settings) status, results = job.disco.results(job_name) if status == 'ready': if job.rule.rule_init_function: job.rule.rule_init_function(job.params) rule.result_processor(rule.result_iterator(results), params=job.params, job_id=job_name) except Exception as e: import traceback trace = traceback.format_exc(15) log.error(trace) log.error("Error processing results for job: %s %s" % (options['process_results'], e)) raise e elif options['process_map']: settings['no_purge'] = True rules_dir = options.get('rules_directory') if not rules_dir: rules_dir = settings.get('rules_directory') try: rule_name = options['process_map'].split('@')[0] job_name = options['process_map'].split('.')[1] rule = get_rules_by_name(rule_name, rules_dir, immediate=True)[0] rule.map_function = None rule.source_tags = [] disco, ddfs = get_disco_handle(settings.get('server')) rule.source_urls = disco.mapresults(job_name) job = InfernoJob(rule, settings) if job.start(): job.wait() except Exception as e: import traceback trace = traceback.format_exc(15) log.error(trace) log.error("Error processing map results for job: %s %s" % (options['process_map'], e)) raise e elif options['immediate_rule']: # run inferno in 'immediate' mode settings['no_purge'] = True setproctitle('inferno - immediate.%s' % options['immediate_rule']) immed_rule = settings.get('immediate_rule') rules_dir = settings.get('rules_directory') rules = get_rules_by_name(immed_rule, rules_dir, immediate=True) try: for rule in rules: execute_rule(rule, settings) except Exception as e: import traceback trace = traceback.format_exc(15) log.error('Job failed: %s' % e.message) log.error(trace) exit(1) elif options['run_daemon']: # run inferno in 'daemon' mode from inferno.lib.daemon import InfernoDaemon setproctitle('inferno - master') InfernoDaemon(settings).start() else: # Display help when no options specified parser.print_help()