def test_base_scraper(self):
        '''Test the no-nonsense simple scraper'''

        _log_scraper = LogScraper()

        #Set file list
        user_params = {}
        user_params[LSC.FILENAME] = os.path.join(LOG_DIR, LOG_FILE)
        _log_scraper.set_user_params(user_params)

        #Add some regexes
        no_group_regex = r'My name is Judge\.$'
        group_regex = r'The (?P<key>\w+) is (?P<value>\w+)\.$'
        _log_scraper.add_regex(name='name_is_judge', pattern=no_group_regex)
        _log_scraper.add_regex(name='key_value_regex', pattern=group_regex)

        #Finally, get some data
        results = _log_scraper.get_log_data()

        expected = {'regexes' : {'key_value_regex': {'group_hits': {'key': OrderedDict([('time', 1),
                                                                                       ('weather', 3)]),
                                                                    'value': OrderedDict([('icy', 1),
                                                                                          ('noon', 1),
                                                                                          ('rainy', 1),
                                                                                          ('sunny', 1)])},
                                                     'total_hits': 4},
                                 'name_is_judge': {'group_hits': {}, 'total_hits': 3}},
                    'file_hits': [{'regexes': {'key_value_regex': {'group_hits': {'value': OrderedDict(),
                                                                                  'key': OrderedDict()},
                                                                   'total_hits': 0},
                                               'name_is_judge': {'group_hits': {}, 'total_hits': 2}},
                                   'filename': './logs/log1.log'},
                                  {'regexes': {'key_value_regex': {'group_hits': {'value': OrderedDict([('icy', 1),
                                                                                                        ('noon', 1),
                                                                                                        ('rainy', 1),
                                                                                                        ('sunny', 1)]),
                                                                                  'key': OrderedDict([('time', 1),
                                                                                                      ('weather', 3)])},
                                                                   'total_hits': 4},
                                               'name_is_judge': {'group_hits': {},
                                                                 'total_hits': 1}},
                                   'filename': './logs/log2.log'}]}

        self.maxDiff = None
        self.assertDictEqual(results, expected)

        # Test the min/max/avg
        stats = _log_scraper._calc_stats(results['regexes']['key_value_regex'][LSC.GROUP_HITS]['key'])
        expected = {'max_key': 'weather',
                    'max_count': 3,
                    'avg_count': 2.0,
                    'min_count': 1,
                    'min_key': 'time'}
        self.assertDictEqual(stats, expected)

        # Test viewing regex hits
        out = StringIO()
        _log_scraper.view_regex_hits(out=out)
        expected = ('My name is Judge.\n'
                    'My name is Judge.\n'
                    'The weather is sunny.\n'
                    'The time is noon.\n'
                    'My name is Judge.\n'
                    'The weather is rainy.\n'
                    'The weather is icy.\n')
        self.assertEqual(out.getvalue(), expected)
Exemple #2
0
    def test_base_scraper(self):
        '''Test the no-nonsense simple scraper'''

        _log_scraper = LogScraper()
        expected = (
            "LogScraper(default_filename=, default_filepath=, "
            "optional_params={'levels_to_boxes': {}, 'filename_regex': '', "
            "'processor_count': 4, 'local_copy_lifetime': 0, 'tmp_path': '', "
            "'force_copy': False, 'days_before_archiving': 0}, user_params={}")
        self.assertEquals(repr(_log_scraper), expected)

        expected = (
            "Regexes: []\n"
            "Default filename: \n"
            "Default filepath: \n"
            "Optional params: {'levels_to_boxes': {}, 'filename_regex': '', "
            "'processor_count': 4, 'local_copy_lifetime': 0, 'tmp_path': '', "
            "'force_copy': False, 'days_before_archiving': 0}\n"
            "User params: {}")
        self.assertEquals(str(_log_scraper), expected)

        #Set file list
        user_params = {}
        user_params[LSC.DEBUG] = True
        user_params[LSC.FILENAME] = os.path.join(LOG_DIR, LOG_FILE)
        _log_scraper.set_user_params(user_params)

        #Add some regexes
        no_group_regex = r'My name is Judge\.$'
        group_regex = r'The (?P<key>\w+) is (?P<value>\w+)\.$'
        _log_scraper.add_regex(name='name_is_judge', pattern=no_group_regex)
        _log_scraper.add_regex(name='key_value_regex', pattern=group_regex)

        # Validate user params (should do nothing)
        _log_scraper._validate_user_params()

        # Should give back nothing
        self.assertEquals(_log_scraper._get_archived_file_path(), None)

        #Finally, get some data
        results = _log_scraper.get_log_data()

        expected = {
            'regexes': {
                'key_value_regex': {
                    'group_hits': {
                        'key':
                        OrderedDict([('time', 1), ('weather', 3)]),
                        'value':
                        OrderedDict([('icy', 1), ('noon', 1), ('rainy', 1),
                                     ('sunny', 1)])
                    },
                    'total_hits': 4
                },
                'name_is_judge': {
                    'group_hits': {},
                    'total_hits': 3
                }
            },
            'file_hits': [{
                'regexes': {
                    'key_value_regex': {
                        'group_hits': {
                            'value': OrderedDict(),
                            'key': OrderedDict()
                        },
                        'total_hits': 0
                    },
                    'name_is_judge': {
                        'group_hits': {},
                        'total_hits': 2
                    }
                },
                'filename': './logs/log1.log'
            }, {
                'regexes': {
                    'key_value_regex': {
                        'group_hits': {
                            'value':
                            OrderedDict([('icy', 1), ('noon', 1), ('rainy', 1),
                                         ('sunny', 1)]),
                            'key':
                            OrderedDict([('time', 1), ('weather', 3)])
                        },
                        'total_hits': 4
                    },
                    'name_is_judge': {
                        'group_hits': {},
                        'total_hits': 1
                    }
                },
                'filename': './logs/log2.log'
            }]
        }

        self.maxDiff = None
        self.assertDictEqual(results, expected)

        # Test the min/max/avg
        # Test with no data
        stats = _log_scraper._calc_stats([])
        expected = {
            'max_key': 0,
            'max_count': 0,
            'avg_count': 0,
            'min_count': 0,
            'min_key': 0
        }
        self.assertDictEqual(stats, expected)

        stats = _log_scraper._calc_stats(
            results['regexes']['key_value_regex'][LSC.GROUP_HITS]['key'])
        expected = {
            'max_key': 'weather',
            'max_count': 3,
            'avg_count': 2.0,
            'min_count': 1,
            'min_key': 'time'
        }
        self.assertDictEqual(stats, expected)

        # Test viewing regex hits
        matches = _log_scraper.get_regex_matches()
        self.assertEquals(len(matches), 2)
        self.assertEquals(matches[0][LSC.FILENAME],
                          os.path.join(LOG_DIR, LOG_FILE_1[0]))
        self.assertEquals(
            len(matches[0][LSC.REGEXES]['key_value_regex'][LSC.MATCHES]), 0)
        self.assertEquals(
            len(matches[0][LSC.REGEXES]['name_is_judge'][LSC.MATCHES]), 2)
        self.assertEquals(matches[1][LSC.FILENAME],
                          os.path.join(LOG_DIR, LOG_FILE_2[0]))
        self.assertEquals(
            len(matches[1][LSC.REGEXES]['key_value_regex'][LSC.MATCHES]), 4)
        self.assertEquals(
            len(matches[1][LSC.REGEXES]['name_is_judge'][LSC.MATCHES]), 1)
    def test_base_scraper(self):
        '''Test the no-nonsense simple scraper'''

        _log_scraper = LogScraper()
        expected = ("LogScraper(default_filename=, default_filepath=, "
                    "optional_params={'levels_to_boxes': {}, 'filename_regex': '', "
                    "'processor_count': 4, 'local_copy_lifetime': 0, 'tmp_path': '', "
                    "'force_copy': False, 'days_before_archiving': 0}, user_params={}")
        self.assertEquals(repr(_log_scraper), expected)

        expected = ("Regexes: []\n"
                    "Default filename: \n"
                    "Default filepath: \n"
                    "Optional params: {'levels_to_boxes': {}, 'filename_regex': '', "
                    "'processor_count': 4, 'local_copy_lifetime': 0, 'tmp_path': '', "
                    "'force_copy': False, 'days_before_archiving': 0}\n"
                    "User params: {}")
        self.assertEquals(str(_log_scraper), expected)

        #Set file list
        user_params = {}
        user_params[LSC.DEBUG] = True
        user_params[LSC.FILENAME] = os.path.join(LOG_DIR, LOG_FILE)
        _log_scraper.set_user_params(user_params)

        #Add some regexes
        no_group_regex = r'My name is Judge\.$'
        group_regex = r'The (?P<key>\w+) is (?P<value>\w+)\.$'
        _log_scraper.add_regex(name='name_is_judge', pattern=no_group_regex)
        _log_scraper.add_regex(name='key_value_regex', pattern=group_regex)

        # Validate user params (should do nothing)
        _log_scraper._validate_user_params()
        
        # Should give back nothing
        self.assertEquals(_log_scraper._get_archived_file_path(), None)

        #Finally, get some data
        results = _log_scraper.get_log_data()

        expected = {'regexes' : {'key_value_regex': {'group_hits': {'key': OrderedDict([('time', 1),
                                                                                       ('weather', 3)]),
                                                                    'value': OrderedDict([('icy', 1),
                                                                                          ('noon', 1),
                                                                                          ('rainy', 1),
                                                                                          ('sunny', 1)])},
                                                     'total_hits': 4},
                                 'name_is_judge': {'group_hits': {}, 'total_hits': 3}},
                    'file_hits': [{'regexes': {'key_value_regex': {'group_hits': {'value': OrderedDict(),
                                                                                  'key': OrderedDict()},
                                                                   'total_hits': 0},
                                               'name_is_judge': {'group_hits': {}, 'total_hits': 2}},
                                   'filename': './logs/log1.log'},
                                  {'regexes': {'key_value_regex': {'group_hits': {'value': OrderedDict([('icy', 1),
                                                                                                        ('noon', 1),
                                                                                                        ('rainy', 1),
                                                                                                        ('sunny', 1)]),
                                                                                  'key': OrderedDict([('time', 1),
                                                                                                      ('weather', 3)])},
                                                                   'total_hits': 4},
                                               'name_is_judge': {'group_hits': {},
                                                                 'total_hits': 1}},
                                   'filename': './logs/log2.log'}]}

        self.maxDiff = None
        self.assertDictEqual(results, expected)

        # Test the min/max/avg
        # Test with no data
        stats = _log_scraper._calc_stats([])
        expected = {'max_key': 0,
                    'max_count': 0,
                    'avg_count': 0,
                    'min_count': 0,
                    'min_key': 0}
        self.assertDictEqual(stats, expected)

        stats = _log_scraper._calc_stats(results['regexes']['key_value_regex'][LSC.GROUP_HITS]['key'])
        expected = {'max_key': 'weather',
                    'max_count': 3,
                    'avg_count': 2.0,
                    'min_count': 1,
                    'min_key': 'time'}
        self.assertDictEqual(stats, expected)

        # Test viewing regex hits
        matches = _log_scraper.get_regex_matches()
        self.assertEquals(len(matches), 2)
        self.assertEquals(matches[0][LSC.FILENAME], os.path.join(LOG_DIR, LOG_FILE_1[0]))
        self.assertEquals(len(matches[0][LSC.REGEXES]['key_value_regex'][LSC.MATCHES]), 0)
        self.assertEquals(len(matches[0][LSC.REGEXES]['name_is_judge'][LSC.MATCHES]), 2)
        self.assertEquals(matches[1][LSC.FILENAME], os.path.join(LOG_DIR, LOG_FILE_2[0]))
        self.assertEquals(len(matches[1][LSC.REGEXES]['key_value_regex'][LSC.MATCHES]), 4)
        self.assertEquals(len(matches[1][LSC.REGEXES]['name_is_judge'][LSC.MATCHES]), 1)