예제 #1
0
def _start_job(rule, settings, urls=None):
    """Start a new job for an InfernoRule

    Note that the output of this function is a tuple of (InfernoJob, DiscoJob)
    If this InfernoJob fails to start by some reasons, e.g. not enough blobs,
    the DiscoJob would be None.
    """
    job = InfernoJob(rule, settings, urls)
    return job, job.start()
예제 #2
0
def _start_job(rule, settings, urls=None):
    """Start a new job for an InfernoRule

    Note that the output of this function is a tuple of (InfernoJob, DiscoJob)
    If this InfernoJob fails to start by some reasons, e.g. not enough blobs,
    the DiscoJob would be None.
    """
    job = InfernoJob(rule, settings, urls)
    return job, job.start()
예제 #3
0
파일: test_job.py 프로젝트: pooya/inferno
 def setUp(self):
     settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12))
     rule = InfernoRule(archive=True,
                        max_blobs=self.MAX_BLOBS,
                        name='some_rule_name',
                        archive_tag_prefix='archived',
                        source_tags=['incoming:data:chunk'])
     self.job = InfernoJob(rule, settings)
     self.job.disco = Disco()
     self.job.ddfs = DDFS()
예제 #4
0
파일: test_job.py 프로젝트: 0scarLi/inferno
 def setUp(self):
     settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12))
     rule = InfernoRule(
         archive=True,
         max_blobs=self.MAX_BLOBS,
         name='some_rule_name',
         archive_tag_prefix='archived',
         source_tags=['incoming:data:chunk'])
     self.job = InfernoJob(rule, settings)
     self.job.disco = Disco()
     self.job.ddfs = DDFS()
예제 #5
0
파일: test_job.py 프로젝트: 0scarLi/inferno
class TestJob(object):

    MAX_BLOBS = 1000
    ARCHIVE_PREFIX = 'archived'

    def setUp(self):
        settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12))
        rule = InfernoRule(
            archive=True,
            max_blobs=self.MAX_BLOBS,
            name='some_rule_name',
            archive_tag_prefix='archived',
            source_tags=['incoming:data:chunk'])
        self.job = InfernoJob(rule, settings)
        self.job.disco = Disco()
        self.job.ddfs = DDFS()

    def test_start_not_enough_blobs(self):
        self.job.rule.min_blobs = 1000
        job = self.job.start()
        eq_(job, None)

    def test_determine_job_blobs(self):
        expected_tags = [
            'incoming:data:chunk:2011-11-12',
            'incoming:data:chunk:2011-11-11']
        expected_blobs = [
            ('/b12.1', '/b12.2', '/b12.3'),
            ('/b11.1', '/b11.2', '/b11.3')]
        archiver = self.job._determine_job_blobs()

        try:
            # check that the archiver was created correctly
            eq_(archiver.max_blobs, self.MAX_BLOBS)
            eq_(archiver.archive_mode, True)
            eq_(archiver.archive_prefix, self.ARCHIVE_PREFIX)

            # check that it found the correct tags and blobs
            eq_(archiver.tags, expected_tags)
            eq_(archiver.job_blobs, expected_blobs)

        except Exception as e:
            raise e

    def test_archive_tags(self):
        # there should be no archived tags before calling archive
        archive_prefix = 'archived:data:chunk'
        archiver = self.job._determine_job_blobs()
        archived = archiver.ddfs.list(archive_prefix)
        eq_(archived, [])

        # should not archive & change state since archive mode is false
        archiver.archive_mode = False
        self.job._archive_tags(archiver)
        archived = archiver.ddfs.list(archive_prefix)
        eq_(archived, [])

        # should archive & change state since archive mode is true
        archiver.archive_mode = True
        self.job._archive_tags(archiver)
        archived = archiver.ddfs.list(archive_prefix)
        expected = [
            'archived:data:chunk:2011-11-11',
            'archived:data:chunk:2011-11-12']
        eq_(archived, expected)

    def test_get_job_results_no_results(self):
        urls = []
        actual = self.job._get_job_results(urls)
        try:
            eq_(list(actual), [])
        except Exception as e:
            raise e

    def test_process_results_no_results(self):
        results = []
        self.job._process_results(results, 'some_job_id')
        #eq_(self.job.current_stage, JOB_PROCESS)

    def test_purge(self):
        # should purge if there's no 'no_purge' setting
        ok_('no_purge' not in self.job.settings)
        self.job._purge('some_job_name')

        # should not purge & change state
        self.job._purge('some_job_name')

        # should purge & change state
        self.job.settings['no_purge'] = False
        self.job._purge('some_job_name')

    def test_profile(self):
        # should not profile if there's no 'profile' setting
        ok_('profile' not in self.job.settings)
        self.job._profile(Mock())

        # should not profile & change state
        self.job.settings['profle'] = False
        self.job._profile(Mock())

        # should profile & change state
        self.job.settings['profile'] = True
        self.job._profile(Mock())

    def test_tag_results(self):
        # should not tag results & change state
        self.job.settings['result_tag'] = None
        self.job._tag_results('some_job_name')

        # should tag results & change state
        before = self.job.ddfs.list('some_result_tag')
        eq_(before, [])

    def test_enough_blobs(self):
        # equal
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = False
        eq_(self.job._enough_blobs(blob_count=100), True)

        # more than
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = False
        eq_(self.job._enough_blobs(blob_count=101), True)

        # less than
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = False
        eq_(self.job._enough_blobs(blob_count=99), False)

        # less than, but force mode is enabled
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = True
        eq_(self.job._enough_blobs(blob_count=99), True)

    def test_str(self):
        eq_(str(self.job), '<InfernoJob for: some_rule_name>')
예제 #6
0
파일: test_pid.py 프로젝트: pooya/inferno
 def setUp(self):
     self.settings = InfernoSettings()
     self._make_temp_pid_dir()
     self.job = InfernoJob(InfernoRule(name='some_rule_name'), {}, Params())
     self.pid_dir = pid.pid_dir(self.settings)
예제 #7
0
파일: test_job.py 프로젝트: pooya/inferno
class TestJob(object):

    MAX_BLOBS = 1000
    ARCHIVE_PREFIX = 'archived'

    def setUp(self):
        settings = InfernoSettings(day_range=2, day_start=date(2011, 11, 12))
        rule = InfernoRule(archive=True,
                           max_blobs=self.MAX_BLOBS,
                           name='some_rule_name',
                           archive_tag_prefix='archived',
                           source_tags=['incoming:data:chunk'])
        self.job = InfernoJob(rule, settings)
        self.job.disco = Disco()
        self.job.ddfs = DDFS()

    def test_start_not_enough_blobs(self):
        self.job.rule.min_blobs = 1000
        job = self.job.start()
        eq_(job, None)

    def test_determine_job_blobs(self):
        expected_tags = [
            'incoming:data:chunk:2011-11-12', 'incoming:data:chunk:2011-11-11'
        ]
        expected_blobs = [('/b12.1', '/b12.2', '/b12.3'),
                          ('/b11.1', '/b11.2', '/b11.3')]
        archiver = self.job._determine_job_blobs()

        try:
            # check that the archiver was created correctly
            eq_(archiver.max_blobs, self.MAX_BLOBS)
            eq_(archiver.archive_mode, True)
            eq_(archiver.archive_prefix, self.ARCHIVE_PREFIX)

            # check that it found the correct tags and blobs
            eq_(archiver.tags, expected_tags)
            eq_(archiver.job_blobs, expected_blobs)

        except Exception as e:
            raise e

    def test_archive_tags(self):
        # there should be no archived tags before calling archive
        archive_prefix = 'archived:data:chunk'
        archiver = self.job._determine_job_blobs()
        archived = archiver.ddfs.list(archive_prefix)
        eq_(archived, [])

        # should not archive & change state since archive mode is false
        archiver.archive_mode = False
        self.job._archive_tags(archiver)
        archived = archiver.ddfs.list(archive_prefix)
        eq_(archived, [])

        # should archive & change state since archive mode is true
        archiver.archive_mode = True
        self.job._archive_tags(archiver)
        archived = archiver.ddfs.list(archive_prefix)
        expected = [
            'archived:data:chunk:2011-11-11', 'archived:data:chunk:2011-11-12'
        ]
        eq_(archived, expected)

    def test_get_job_results_no_results(self):
        urls = []
        actual = self.job._get_job_results(urls)
        try:
            eq_(list(actual), [])
        except Exception as e:
            raise e

    def test_process_results_no_results(self):
        results = []
        self.job._process_results(results, 'some_job_id')
        #eq_(self.job.current_stage, JOB_PROCESS)

    def test_purge(self):
        # should purge if there's no 'no_purge' setting
        ok_('no_purge' not in self.job.settings)
        self.job._purge('some_job_name')

        # should not purge & change state
        self.job._purge('some_job_name')

        # should purge & change state
        self.job.settings['no_purge'] = False
        self.job._purge('some_job_name')

    def test_profile(self):
        # should not profile if there's no 'profile' setting
        ok_('profile' not in self.job.settings)
        self.job._profile(Mock())

        # should not profile & change state
        self.job.settings['profle'] = False
        self.job._profile(Mock())

        # should profile & change state
        self.job.settings['profile'] = True
        self.job._profile(Mock())

    def test_tag_results(self):
        # should not tag results & change state
        self.job.settings['result_tag'] = None
        self.job._tag_results('some_job_name')

        # should tag results & change state
        before = self.job.ddfs.list('some_result_tag')
        eq_(before, [])

    def test_enough_blobs(self):
        # equal
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = False
        eq_(self.job._enough_blobs(blob_count=100), True)

        # more than
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = False
        eq_(self.job._enough_blobs(blob_count=101), True)

        # less than
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = False
        eq_(self.job._enough_blobs(blob_count=99), False)

        # less than, but force mode is enabled
        self.job.rule.min_blobs = 100
        self.job.settings['force'] = True
        eq_(self.job._enough_blobs(blob_count=99), True)

    def test_str(self):
        eq_(str(self.job), '<InfernoJob for: some_rule_name>')
예제 #8
0
def main(argv=sys.argv):
    options, parser = _get_options(argv[1:])
    settings = _get_settings(options)

    if options['example_rules']:
        try:
            os.mkdir(options['example_rules'])
            here = os.path.dirname(__file__)
            src_dir = os.path.join(here, '..', 'example_rules')
            src_dir = os.path.abspath(src_dir)
            dst_dir = os.path.abspath(options['example_rules'])
            for name in os.listdir(src_dir):
                if name.endswith('.py'):
                    src = os.path.join(src_dir, name)
                    dst = os.path.join(dst_dir, name)
                    shutil.copy(src, dst)
            print '\n\tCreated example rules dir:\n\n\t\t%s' % dst_dir
            for name in os.listdir(dst_dir):
                print '\t\t\t', name
        except Exception as e:
            print 'Error creating example rules dir %r' % (e)
        finally:
            return

    _setup_logging(settings)

    for path in settings.get('extra_python_paths'):
        sys.path.insert(0, path)

    if options['process_results']:
        settings['no_purge'] = True
        rules_dir = options.get('rules_directory')
        if not rules_dir:
            rules_dir = settings.get('rules_directory')
        try:
            rule_name = options['process_results'].split('@')[0]
            job_name = options['process_results'].split('.')[1]
            rule = get_rules_by_name(rule_name, rules_dir, immediate=True)[0]
            job = InfernoJob(rule, settings)
            status, results = job.disco.results(job_name)
            if status == 'ready':
                if job.rule.rule_init_function:
                    job.rule.rule_init_function(job.params)
                rule.result_processor(rule.result_iterator(results),
                                      params=job.params,
                                      job_id=job_name)
        except Exception as e:
            import traceback
            trace = traceback.format_exc(15)
            log.error(trace)
            log.error("Error processing results for job: %s %s" %
                      (options['process_results'], e))
            raise e
    elif options['process_map']:
        settings['no_purge'] = True
        rules_dir = options.get('rules_directory')
        if not rules_dir:
            rules_dir = settings.get('rules_directory')
        try:
            rule_name = options['process_map'].split('@')[0]
            job_name = options['process_map'].split('.')[1]
            rule = get_rules_by_name(rule_name, rules_dir, immediate=True)[0]
            rule.map_function = None
            rule.source_tags = []
            disco, ddfs = get_disco_handle(settings.get('server'))
            rule.source_urls = disco.mapresults(job_name)
            job = InfernoJob(rule, settings)
            if job.start():
                job.wait()
        except Exception as e:
            import traceback
            trace = traceback.format_exc(15)
            log.error(trace)
            log.error("Error processing map results for job: %s %s" %
                      (options['process_map'], e))
            raise e
    elif options['immediate_rule']:
        # run inferno in 'immediate' mode
        settings['no_purge'] = True
        setproctitle('inferno - immediate.%s' % options['immediate_rule'])
        immed_rule = settings.get('immediate_rule')
        rules_dir = settings.get('rules_directory')
        rules = get_rules_by_name(immed_rule, rules_dir, immediate=True)
        try:
            for rule in rules:
                execute_rule(rule, settings)
        except Exception as e:
            import traceback
            trace = traceback.format_exc(15)
            log.error('Job failed: %s' % e.message)
            log.error(trace)
            exit(1)

    elif options['run_daemon']:
        # run inferno in 'daemon' mode
        from inferno.lib.daemon import InfernoDaemon
        setproctitle('inferno - master')
        InfernoDaemon(settings).start()

    else:
        # Display help when no options specified
        parser.print_help()
예제 #9
0
파일: run.py 프로젝트: chango/inferno
def main(argv=sys.argv):
    options, parser = _get_options(argv[1:])
    settings = _get_settings(options)

    if options['example_rules']:
        try:
            os.mkdir(options['example_rules'])
            here = os.path.dirname(__file__)
            src_dir = os.path.join(here, '..', 'example_rules')
            src_dir = os.path.abspath(src_dir)
            dst_dir = os.path.abspath(options['example_rules'])
            for name in os.listdir(src_dir):
                if name.endswith('.py'):
                    src = os.path.join(src_dir, name)
                    dst = os.path.join(dst_dir, name)
                    shutil.copy(src, dst)
            print '\n\tCreated example rules dir:\n\n\t\t%s' % dst_dir
            for name in os.listdir(dst_dir):
                print '\t\t\t', name
        except Exception as e:
            print 'Error creating example rules dir %r' % (e)
        finally:
            return

    _setup_logging(settings)

    for path in settings.get('extra_python_paths'):
        sys.path.insert(0, path)

    if options['process_results']:
        settings['no_purge'] = True
        rules_dir = options.get('rules_directory')
        if not rules_dir:
            rules_dir = settings.get('rules_directory')
        try:
            rule_name = options['process_results'].split('@')[0]
            job_name = options['process_results'].split('.')[1]
            rule = get_rules_by_name(rule_name, rules_dir, immediate=True)[0]
            job = InfernoJob(rule, settings)
            status, results = job.disco.results(job_name)
            if status == 'ready':
                if job.rule.rule_init_function:
                    job.rule.rule_init_function(job.params)
                rule.result_processor(rule.result_iterator(results), params=job.params, job_id=job_name)
        except Exception as e:
            import traceback
            trace = traceback.format_exc(15)
            log.error(trace)
            log.error("Error processing results for job: %s %s" % (options['process_results'], e))
            raise e
    elif options['process_map']:
        settings['no_purge'] = True
        rules_dir = options.get('rules_directory')
        if not rules_dir:
            rules_dir = settings.get('rules_directory')
        try:
            rule_name = options['process_map'].split('@')[0]
            job_name = options['process_map'].split('.')[1]
            rule = get_rules_by_name(rule_name, rules_dir, immediate=True)[0]
            rule.map_function = None
            rule.source_tags = []
            disco, ddfs = get_disco_handle(settings.get('server'))
            rule.source_urls = disco.mapresults(job_name)
            job = InfernoJob(rule, settings)
            if job.start():
                job.wait()
        except Exception as e:
            import traceback
            trace = traceback.format_exc(15)
            log.error(trace)
            log.error("Error processing map results for job: %s %s" % (options['process_map'], e))
            raise e
    elif options['immediate_rule']:
        # run inferno in 'immediate' mode
        settings['no_purge'] = True
        setproctitle('inferno - immediate.%s' % options['immediate_rule'])
        immed_rule = settings.get('immediate_rule')
        rules_dir = settings.get('rules_directory')
        rules = get_rules_by_name(immed_rule, rules_dir, immediate=True)
        try:
            for rule in rules:
                execute_rule(rule, settings)
        except Exception as e:
            import traceback
            trace = traceback.format_exc(15)
            log.error('Job failed: %s' % e.message)
            log.error(trace)
            exit(1)

    elif options['run_daemon']:
        # run inferno in 'daemon' mode
        from inferno.lib.daemon import InfernoDaemon
        setproctitle('inferno - master')
        InfernoDaemon(settings).start()

    else:
        # Display help when no options specified
        parser.print_help()