Example #1
0
    def main(self):
        self.setUpUtilities()

        # Materialize the list of files to parse. It is better to do the
        # checks now, rather than potentially hours later when the
        # generator gets around to it, because there is a reasonable
        # chance log rotation will have kicked in and removed our oldest
        # files. Note that we still error if a file we want to parse
        # disappears before we get around to parsing it, which is
        # desirable behaviour.
        files_to_parse = list(
            get_files_to_parse(
                glob.glob(os.path.join(self.root, self.log_file_glob))))

        country_set = getUtility(ICountrySet)
        parsed_lines = 0
        max_parsed_lines = getattr(config.launchpad,
                                   'logparser_max_parsed_lines', None)
        max_is_set = max_parsed_lines is not None
        for fd, position in files_to_parse:
            # If we've used up our budget of lines to process, stop.
            if (max_is_set and parsed_lines >= max_parsed_lines):
                break
            downloads, parsed_bytes, parsed_lines = parse_file(
                fd, position, self.logger, self.getDownloadKey)
            # Use a while loop here because we want to pop items from the dict
            # in order to free some memory as we go along. This is a good
            # thing here because the downloads dict may get really huge.
            while downloads:
                file_id, daily_downloads = downloads.popitem()
                update_download_count = self.getDownloadCountUpdater(file_id)

                # The object couldn't be retrieved (maybe it was deleted).
                # Don't bother counting downloads for it.
                if update_download_count is None:
                    continue

                for day, country_downloads in daily_downloads.items():
                    for country_code, count in country_downloads.items():
                        try:
                            country = country_set[country_code]
                        except NotFoundError:
                            # We don't know the country for the IP address
                            # where this request originated.
                            country = None
                        update_download_count(day, country, count)
            fd.seek(0)
            first_line = fd.readline()
            fd.close()
            create_or_update_parsedlog_entry(first_line, parsed_bytes)
            self.txn.commit()
            if safe_hasattr(fd, 'name'):
                name = fd.name
            else:
                name = fd
            self.logger.info('Finished parsing %s' % name)

        self.logger.info('Done parsing apache log files')
Example #2
0
    def main(self):
        self.setUpUtilities()

        # Materialize the list of files to parse. It is better to do the
        # checks now, rather than potentially hours later when the
        # generator gets around to it, because there is a reasonable
        # chance log rotation will have kicked in and removed our oldest
        # files. Note that we still error if a file we want to parse
        # disappears before we get around to parsing it, which is
        # desirable behavior.
        files_to_parse = list(get_files_to_parse(
            glob.glob(os.path.join(self.root, self.log_file_glob))))

        country_set = getUtility(ICountrySet)
        parsed_lines = 0
        max_parsed_lines = getattr(
            config.launchpad, 'logparser_max_parsed_lines', None)
        max_is_set = max_parsed_lines is not None
        for fd, position in files_to_parse:
            # If we've used up our budget of lines to process, stop.
            if (max_is_set and parsed_lines >= max_parsed_lines):
                break
            downloads, parsed_bytes, parsed_lines = parse_file(
                fd, position, self.logger, self.getDownloadKey)
            # Use a while loop here because we want to pop items from the dict
            # in order to free some memory as we go along. This is a good
            # thing here because the downloads dict may get really huge.
            while downloads:
                file_id, daily_downloads = downloads.popitem()
                update_download_count = self.getDownloadCountUpdater(file_id)

                # The object couldn't be retrieved (maybe it was deleted).
                # Don't bother counting downloads for it.
                if update_download_count is None:
                    continue

                for day, country_downloads in daily_downloads.items():
                    for country_code, count in country_downloads.items():
                        try:
                            country = country_set[country_code]
                        except NotFoundError:
                            # We don't know the country for the IP address
                            # where this request originated.
                            country = None
                        update_download_count(day, country, count)
            fd.seek(0)
            first_line = fd.readline()
            fd.close()
            create_or_update_parsedlog_entry(first_line, parsed_bytes)
            self.txn.commit()
            if safe_hasattr(fd, 'name'):
                name = fd.name
            else:
                name = fd
            self.logger.info('Finished parsing %s' % name)

        self.logger.info('Done parsing apache log files')
    def test_creation_of_new_entries(self):
        # When given a first_line that doesn't exist in the ParsedApacheLog
        # table, create_or_update_parsedlog_entry() will create a new entry
        # with the given number of bytes read.
        first_line = u'First line'
        create_or_update_parsedlog_entry(first_line,
                                         parsed_bytes=len(first_line))

        entry = IStore(ParsedApacheLog).find(ParsedApacheLog,
                                             first_line=first_line).one()
        self.assertIsNot(None, entry)
        self.assertEqual(entry.bytes_read, len(first_line))
    def test_creation_of_new_entries(self):
        # When given a first_line that doesn't exist in the ParsedApacheLog
        # table, create_or_update_parsedlog_entry() will create a new entry
        # with the given number of bytes read.
        first_line = u'First line'
        create_or_update_parsedlog_entry(
            first_line, parsed_bytes=len(first_line))

        entry = IStore(ParsedApacheLog).find(
            ParsedApacheLog, first_line=first_line).one()
        self.assertIsNot(None, entry)
        self.assertEqual(entry.bytes_read, len(first_line))
    def test_update_of_existing_entries(self):
        # When given a first_line that already exists in the ParsedApacheLog
        # table, create_or_update_parsedlog_entry() will update that entry
        # with the given number of bytes read.
        first_line = u'First line'
        create_or_update_parsedlog_entry(first_line, parsed_bytes=2)
        store = IStore(ParsedApacheLog)
        entry = store.find(ParsedApacheLog, first_line=first_line).one()

        # Here we see that the new entry was created.
        self.assertIsNot(None, entry)
        self.assertEqual(entry.bytes_read, 2)

        create_or_update_parsedlog_entry(first_line,
                                         parsed_bytes=len(first_line))

        # And here we see that same entry was updated by the second call to
        # create_or_update_parsedlog_entry().
        entry2 = store.find(ParsedApacheLog, first_line=first_line).one()
        self.assertIs(entry, entry2)
        self.assertIsNot(None, entry2)
        self.assertEqual(entry2.bytes_read, len(first_line))
    def test_update_of_existing_entries(self):
        # When given a first_line that already exists in the ParsedApacheLog
        # table, create_or_update_parsedlog_entry() will update that entry
        # with the given number of bytes read.
        first_line = u'First line'
        create_or_update_parsedlog_entry(first_line, parsed_bytes=2)
        store = IStore(ParsedApacheLog)
        entry = store.find(ParsedApacheLog, first_line=first_line).one()

        # Here we see that the new entry was created.
        self.assertIsNot(None, entry)
        self.assertEqual(entry.bytes_read, 2)

        create_or_update_parsedlog_entry(
            first_line, parsed_bytes=len(first_line))

        # And here we see that same entry was updated by the second call to
        # create_or_update_parsedlog_entry().
        entry2 = store.find(ParsedApacheLog, first_line=first_line).one()
        self.assertIs(entry, entry2)
        self.assertIsNot(None, entry2)
        self.assertEqual(entry2.bytes_read, len(first_line))