def test_completely_parsed_file(self):
        # A file that has been completely parsed will be skipped.
        fd = open(self.file_path)
        first_line = fd.readline()
        fd.seek(0)
        ParsedApacheLog(first_line, len(fd.read()))

        files_to_parse = get_files_to_parse([self.file_path])
        self.assertEqual(list(files_to_parse), [])
 def test_resumed_gzipped_file(self):
     # In subsequent runs of the script we will resume from where we
     # stopped last time. (Here we pretend we parsed only the first line)
     gz_name = 'launchpadlibrarian.net.access-log.1.gz'
     gz_path = os.path.join(self.root, gz_name)
     first_line = gzip.open(gz_path).readline()
     ParsedApacheLog(first_line, len(first_line))
     files_to_parse = get_files_to_parse([gz_path])
     positions = map(itemgetter(1), files_to_parse)
     self.assertEqual(positions, [len(first_line)])
Beispiel #3
0
def create_or_update_parsedlog_entry(first_line, parsed_bytes):
    """Create or update the ParsedApacheLog with the given first_line."""
    first_line = unicode(first_line)
    parsed_file = IStore(ParsedApacheLog).find(ParsedApacheLog,
                                               first_line=first_line).one()
    if parsed_file is None:
        ParsedApacheLog(first_line, parsed_bytes)
    else:
        parsed_file.bytes_read = parsed_bytes
        parsed_file.date_last_parsed = datetime.now(pytz.UTC)
    def test_parsed_file_with_new_content(self):
        # A file that has been parsed already but in which new content was
        # added will be parsed again, starting from where parsing stopped last
        # time.
        first_line = open(self.file_path).readline()
        ParsedApacheLog(first_line, len(first_line))

        files_to_parse = list(get_files_to_parse([self.file_path]))
        self.assertEqual(len(files_to_parse), 1)
        fd, position = files_to_parse[0]
        # Since we parsed the first line above, we'll be told to start where
        # the first line ends.
        self.assertEqual(position, len(first_line))
    def test_different_files_with_same_name(self):
        # Thanks to log rotation, two runs of our script may see files with
        # the same name but completely different content.  If we see a file
        # with a name matching that of an already parsed file but with content
        # differing from the last file with that name parsed, we know we need
        # to parse the file from the start.
        ParsedApacheLog('First line', bytes_read=1000)

        # This file has the same name of the previous one (which has been
        # parsed already), but its first line is different, so we'll have to
        # parse it from the start.
        fd, new_path = tempfile.mkstemp()
        content2 = 'Different First Line\nSecond Line'
        fd = open(new_path, 'w')
        fd.write(content2)
        fd.close()
        files_to_parse = get_files_to_parse([new_path])
        positions = map(itemgetter(1), files_to_parse)
        self.assertEqual(positions, [0])