def __init__(self, filename=None, content=NEWWORKBOOK): self.streams = {} self.sheets = RecordList(BoundSheet) self.fonts = RecordListRW(FontRecord) self.numberformats = RecordList(NumberFormat) self.extendedformats = RecordListRW(ExtendedFormat) self.staticstrings = StaticStrings() self.names = RecordList(NameRecord) self.supbooks = RecordList(SupBookRecord) if filename or content: self.read(filename, content)
def __init__(self, filename=None,content=NEWWORKBOOK): self.streams={} self.sheets=RecordList(BoundSheet) self.fonts=RecordListRW(FontRecord) self.numberformats=RecordList(NumberFormat) self.extendedformats=RecordListRW(ExtendedFormat) self.staticstrings=StaticStrings() self.names=RecordList(NameRecord) self.supbooks=RecordList(SupBookRecord) if filename or content: self.read(filename,content)
class HSSFWorkbook(RecordContainer): MAX_ROW = 0xFFFF MAX_COLUMN = 0x00FF # The maximum number of cell styles in a .xls workbook. # The 'official' limit is 4,000, but POI allows a slightly larger number. # This extra delta takes into account built-in styles that are automatically # created for new workbooks # # See http://office.microsoft.com/en-us/excel-help/excel-specifications-and-limits-HP005199291.aspx MAX_STYLES = 4030 def __init__(self, filename=None,content=NEWWORKBOOK): self.streams={} self.sheets=RecordList(BoundSheet) self.fonts=RecordListRW(FontRecord) self.numberformats=RecordList(NumberFormat) self.extendedformats=RecordListRW(ExtendedFormat) self.staticstrings=StaticStrings() self.names=RecordList(NameRecord) self.supbooks=RecordList(SupBookRecord) if filename or content: self.read(filename,content) def write(self, filename): cfb = CFBWriter() for name in sorted(self.streams.iterkeys()): cfb.put(name,self.streams[name]) cfb.put(('Workbook',),self.getdata()) cfb.write(filename) def getdata(self): self.staticstrings.newstrings=[] self.staticstrings.newstring_map={} for sheet in self.sheets: if sheet.sheet: sheet.sheetdata=sheet.sheet.getdata() self.staticstrings.strings=self.staticstrings.newstrings del self.staticstrings.newstring_map if 0x0085 not in self.urecord: self.last_record.next=self.sheets result=[] first=self.records.next sheetpos=-1 while first: if sheetpos==-1 and first is self.sheets: sheetpos=len(result) result.append(first.get_data(self)) first=first.next reslen=sum(map(len,result)) result.append(self.staticstrings.getdata(self,reslen)) result.append(struct.pack('<HH',0x000A,0)) #EOF reslen+=len(result[-2])+4 for sheet in self.sheets: sheet.position_of_BOF=reslen result.append(sheet.sheetdata) reslen+=len(sheet.sheetdata) result[sheetpos]=self.sheets.get_data(self) return ''.join(result) def read(self, filename, content): if filename: filehandle=open(filename) cfb = CFBReader(filehandle) self.streams = cfb.dirtree del cfb # Normally, the Workbook will be in a POIFS Stream # called "Workbook". However, some XLS generators use "WORKBOOK" workbook=None for wb in ('Book','BOOK','WORKBOOK','Workbook'): if (wb,) in self.streams: workbook = self.streams.pop((wb,)) if not workbook: raise IOError('The file does not contain a Workbook-entry') content=workbook.data loaders={ 0x0018: self.names.read, 0x0031: self.fonts.read, 0x0059: self.read_xct, 0x005A: self.read_crn, 0x0085: self.sheets.read, 0x00e0: self.extendedformats.read, 0x00fc: self.staticstrings.read, 0x00ff: Record.ignore, 0x01ae: self.supbooks.read, 0x041E: self.numberformats.read, } urecord={} self.records=Record(0,0) last_record=self.records ofs=0 for sid, data in record_stream(content): if sid==0x000A: #EOF break new_record=loaders.get(sid,Record)(sid,data) if new_record: last_record.next=new_record last_record=new_record if sid not in urecord: urecord[sid]=new_record #if new_record.__class__==Record: # print '%04x(%08x): %s'%(sid,ofs,poi.utils.DEBUG_RECORDS.get(sid)) ofs+=len(data) self.urecord=urecord self.last_record=last_record pos=len(content) for sheet in sorted(self.sheets,key=lambda s:s.position_of_BOF,reverse=True): sheet.sheetdata=content[sheet.position_of_BOF:pos] pos=sheet.position_of_BOF self.numberformats_map=dict(std_format_strings) self.numberformats_map.update(dict([(nf.index,nf.format) for nf in self.numberformats])) def read_xct(self, sid, data): cnt, itab = struct.unpack_from('<hH',data+'\0\0',4) supbook=self.supbooks[-1] sheet=SupBookSheet(supbook.sheets[itab],cnt) supbook.sheets[itab]=sheet self.supbooksheet=sheet def read_crn(self, sid, data): self.supbooksheet.append(data) def add_extformat(self, xf): if not hasattr(xf,'put_record'): xf = self.extendedformats[xf] return xf.put_record(self)
class HSSFWorkbook(RecordContainer): MAX_ROW = 0xFFFF MAX_COLUMN = 0x00FF # The maximum number of cell styles in a .xls workbook. # The 'official' limit is 4,000, but POI allows a slightly larger number. # This extra delta takes into account built-in styles that are automatically # created for new workbooks # # See http://office.microsoft.com/en-us/excel-help/excel-specifications-and-limits-HP005199291.aspx MAX_STYLES = 4030 def __init__(self, filename=None, content=NEWWORKBOOK): self.streams = {} self.sheets = RecordList(BoundSheet) self.fonts = RecordListRW(FontRecord) self.numberformats = RecordList(NumberFormat) self.extendedformats = RecordListRW(ExtendedFormat) self.staticstrings = StaticStrings() self.names = RecordList(NameRecord) self.supbooks = RecordList(SupBookRecord) if filename or content: self.read(filename, content) def write(self, filename): cfb = CFBWriter() for name in sorted(self.streams.iterkeys()): cfb.put(name, self.streams[name]) cfb.put(('Workbook', ), self.getdata()) cfb.write(filename) def getdata(self): self.staticstrings.newstrings = [] self.staticstrings.newstring_map = {} for sheet in self.sheets: if sheet.sheet: sheet.sheetdata = sheet.sheet.getdata() self.staticstrings.strings = self.staticstrings.newstrings del self.staticstrings.newstring_map if 0x0085 not in self.urecord: self.last_record.next = self.sheets result = [] first = self.records.next sheetpos = -1 while first: if sheetpos == -1 and first is self.sheets: sheetpos = len(result) result.append(first.get_data(self)) first = first.next reslen = sum(map(len, result)) result.append(self.staticstrings.getdata(self, reslen)) result.append(struct.pack('<HH', 0x000A, 0)) #EOF reslen += len(result[-2]) + 4 for sheet in self.sheets: sheet.position_of_BOF = reslen result.append(sheet.sheetdata) reslen += len(sheet.sheetdata) result[sheetpos] = self.sheets.get_data(self) return ''.join(result) def read(self, filename, content): if filename: filehandle = open(filename) cfb = CFBReader(filehandle) self.streams = cfb.dirtree del cfb # Normally, the Workbook will be in a POIFS Stream # called "Workbook". However, some XLS generators use "WORKBOOK" workbook = None for wb in ('Book', 'BOOK', 'WORKBOOK', 'Workbook'): if (wb, ) in self.streams: workbook = self.streams.pop((wb, )) if not workbook: raise IOError('The file does not contain a Workbook-entry') content = workbook.data loaders = { 0x0018: self.names.read, 0x0031: self.fonts.read, 0x0059: self.read_xct, 0x005A: self.read_crn, 0x0085: self.sheets.read, 0x00e0: self.extendedformats.read, 0x00fc: self.staticstrings.read, 0x00ff: Record.ignore, 0x01ae: self.supbooks.read, 0x041E: self.numberformats.read, } urecord = {} self.records = Record(0, 0) last_record = self.records ofs = 0 for sid, data in record_stream(content): if sid == 0x000A: #EOF break new_record = loaders.get(sid, Record)(sid, data) if new_record: last_record.next = new_record last_record = new_record if sid not in urecord: urecord[sid] = new_record #if new_record.__class__==Record: # print '%04x(%08x): %s'%(sid,ofs,poi.utils.DEBUG_RECORDS.get(sid)) ofs += len(data) self.urecord = urecord self.last_record = last_record pos = len(content) for sheet in sorted(self.sheets, key=lambda s: s.position_of_BOF, reverse=True): sheet.sheetdata = content[sheet.position_of_BOF:pos] pos = sheet.position_of_BOF self.numberformats_map = dict(std_format_strings) self.numberformats_map.update( dict([(nf.index, nf.format) for nf in self.numberformats])) def read_xct(self, sid, data): cnt, itab = struct.unpack_from('<hH', data + '\0\0', 4) supbook = self.supbooks[-1] sheet = SupBookSheet(supbook.sheets[itab], cnt) supbook.sheets[itab] = sheet self.supbooksheet = sheet def read_crn(self, sid, data): self.supbooksheet.append(data) def add_extformat(self, xf): if not hasattr(xf, 'put_record'): xf = self.extendedformats[xf] return xf.put_record(self)