def plogiter(fn): #f = open(sys.argv[1]) f = open(fn) version, _, dir_pos, _, chunk_size = struct.unpack('<IIQII', f.read(24)) #print version, dir_pos, chunk_size f.seek(dir_pos) num_chunks = struct.unpack('<I', f.read(4))[0] #print num_chunks if num_chunks == 0: sys.exit(0) entries = [] for i in range(num_chunks): buf = f.read(24) entries.append(struct.unpack('<QQQ', buf)) if entries[-1][1] != dir_pos: entries.append((0, dir_pos, 0)) #print entries # print "[" for entry, next_entry in zip(entries, entries[1:]): start_instr, start_pos, num_entries = entry next_pos = next_entry[1] f.seek(start_pos) zsize = next_pos - start_pos #print start_pos, next_pos, zsize, zdata = f.read(zsize) data = zlib.decompress(zdata, 15, chunk_size) #print len(data) i = 0 while i < len(data): yieldstr = "" # if i != 0: print "," if i != 0: yieldstr += "," entry_size = struct.unpack('<I', data[i:i+4])[0] i += 4 entry_data = data[i:i+entry_size] message = plog_pb2.LogEntry() message.ParseFromString(entry_data) yield message # yield (yieldstr + str(MessageToJson(message))) # print MessageToJson(message) # print MessageToJson(message) i += entry_size
def __next__(self): # ran out of chunks if not self.chunk_idx < self.nchunks: raise StopIteration if self.chunk_data is None: # unpack ins, pos, nentries for this and the next chunk cur = struct.unpack_from('<QQQ', self.chunks, 24 * self.chunk_idx) if self.chunk_idx + 1 < self.nchunks: nxt = struct.unpack_from('<QQQ', self.chunks, 24 * (self.chunk_idx + 1)) zchunk_size = nxt[1] - cur[1] else: # setting the compressed chunk size to -1 will # result in reading the remaining of the file zchunk_size = -1 # read and decompress chunk data self.f.seek(cur[1]) self.chunk_data = zlib.decompress(self.f.read(zchunk_size), 15, self.chunk_gsize) self.chunk_size = len(self.chunk_data) self.chunk_data_idx = 0 # parse message - we're using a fresh message # using MergeFromString() is slightly faster than using ParseFromString() msg_size, = struct.unpack_from('<I', self.chunk_data, self.chunk_data_idx) msg = plog_pb2.LogEntry() msg_start = self.chunk_data_idx + 4 msg_end = msg_start + msg_size msg.MergeFromString(self.chunk_data[msg_start:msg_end]) # update state self.chunk_data_idx = msg_end if not self.chunk_data_idx < self.chunk_size: self.chunk_idx += 1 self.chunk_size = 0 self.chunk_data = None self.chunk_data_idx = 0 return msg
def read(plog): f = open(plog) version, _, dir_pos, _, chunk_size = struct.unpack('<IIQII', f.read(24)) #print version, dir_pos, chunk_size f.seek(dir_pos) num_chunks = struct.unpack('<I', f.read(4))[0] #print num_chunks if num_chunks == 0: raise ValueError("Not a Pandalog") entries = [] for i in range(num_chunks): buf = f.read(24) entries.append(struct.unpack('<QQQ', buf)) if entries[-1][1] != dir_pos: entries.append((0, dir_pos, 0)) for entry, next_entry in zip(entries, entries[1:]): start_instr, start_pos, num_entries = entry next_pos = next_entry[1] f.seek(start_pos) zsize = next_pos - start_pos #print start_pos, next_pos, zsize, zdata = f.read(zsize) data = zlib.decompress(zdata, 15, chunk_size) #print len(data) i = 0 while i < len(data): entry_size = struct.unpack('<I', data[i:i + 4])[0] i += 4 entry_data = data[i:i + entry_size] message = plog_pb2.LogEntry() message.ParseFromString(entry_data) yield message i += entry_size
for i in range(num_chunks): buf = f.read(24) entries.append(struct.unpack('<QQQ', buf)) if entries[-1][1] != dir_pos: entries.append((0, dir_pos, 0)) #print entries print "[" for entry, next_entry in zip(entries, entries[1:]): start_instr, start_pos, num_entries = entry next_pos = next_entry[1] f.seek(start_pos) zsize = next_pos - start_pos #print start_pos, next_pos, zsize, zdata = f.read(zsize) data = zlib.decompress(zdata, 15, chunk_size) #print len(data) i = 0 while i < len(data): if i != 0: print "," entry_size = struct.unpack('<I', data[i:i+4])[0] i += 4 entry_data = data[i:i+entry_size] message = plog_pb2.LogEntry() message.ParseFromString(entry_data) print MessageToJson(message) i += entry_size print "]"