Example #1
0
File: excel.py Project: ruema/pypoi
 def __init__(self, filename=None, content=NEWWORKBOOK):
     self.streams = {}
     self.sheets = RecordList(BoundSheet)
     self.fonts = RecordListRW(FontRecord)
     self.numberformats = RecordList(NumberFormat)
     self.extendedformats = RecordListRW(ExtendedFormat)
     self.staticstrings = StaticStrings()
     self.names = RecordList(NameRecord)
     self.supbooks = RecordList(SupBookRecord)
     if filename or content:
         self.read(filename, content)
Example #2
0
 def __init__(self, filename=None,content=NEWWORKBOOK):
     self.streams={}
     self.sheets=RecordList(BoundSheet)
     self.fonts=RecordListRW(FontRecord)
     self.numberformats=RecordList(NumberFormat)
     self.extendedformats=RecordListRW(ExtendedFormat)
     self.staticstrings=StaticStrings()
     self.names=RecordList(NameRecord)
     self.supbooks=RecordList(SupBookRecord)
     if filename or content:
         self.read(filename,content)
Example #3
0
class HSSFWorkbook(RecordContainer):
    MAX_ROW = 0xFFFF
    MAX_COLUMN = 0x00FF

    # The maximum number of cell styles in a .xls workbook.
    # The 'official' limit is 4,000, but POI allows a slightly larger number.
    # This extra delta takes into account built-in styles that are automatically
    # created for new workbooks
    #
    # See http://office.microsoft.com/en-us/excel-help/excel-specifications-and-limits-HP005199291.aspx
    MAX_STYLES = 4030
    
    def __init__(self, filename=None,content=NEWWORKBOOK):
        self.streams={}
        self.sheets=RecordList(BoundSheet)
        self.fonts=RecordListRW(FontRecord)
        self.numberformats=RecordList(NumberFormat)
        self.extendedformats=RecordListRW(ExtendedFormat)
        self.staticstrings=StaticStrings()
        self.names=RecordList(NameRecord)
        self.supbooks=RecordList(SupBookRecord)
        if filename or content:
            self.read(filename,content)
            
    def write(self, filename):
        cfb = CFBWriter()
        for name in sorted(self.streams.iterkeys()):
            cfb.put(name,self.streams[name])
        cfb.put(('Workbook',),self.getdata())
        cfb.write(filename)
        
    def getdata(self):
        self.staticstrings.newstrings=[]
        self.staticstrings.newstring_map={}
        for sheet in self.sheets:
            if sheet.sheet:
                sheet.sheetdata=sheet.sheet.getdata()
        self.staticstrings.strings=self.staticstrings.newstrings
        del self.staticstrings.newstring_map

        if 0x0085 not in self.urecord:
            self.last_record.next=self.sheets
        result=[]
        first=self.records.next
        sheetpos=-1
        while first:
            if sheetpos==-1 and first is self.sheets:
                sheetpos=len(result)
            result.append(first.get_data(self))
            first=first.next
        reslen=sum(map(len,result))
        result.append(self.staticstrings.getdata(self,reslen))
        result.append(struct.pack('<HH',0x000A,0))  #EOF
        reslen+=len(result[-2])+4
        
        for sheet in self.sheets:
            sheet.position_of_BOF=reslen
            result.append(sheet.sheetdata)
            reslen+=len(sheet.sheetdata)
        result[sheetpos]=self.sheets.get_data(self)
        return ''.join(result)
        

    def read(self, filename, content):
        if filename:
            filehandle=open(filename)
            cfb = CFBReader(filehandle)
            self.streams = cfb.dirtree
            del cfb

            # Normally, the Workbook will be in a POIFS Stream
            # called "Workbook". However, some XLS generators use "WORKBOOK"
            workbook=None
            for wb in ('Book','BOOK','WORKBOOK','Workbook'):
                if (wb,) in self.streams:
                    workbook = self.streams.pop((wb,))
            if not workbook:
                raise IOError('The file does not contain a Workbook-entry')
            content=workbook.data

        loaders={
            0x0018: self.names.read,
            0x0031: self.fonts.read,
            0x0059: self.read_xct,
            0x005A: self.read_crn,
            0x0085: self.sheets.read,
            0x00e0: self.extendedformats.read,
            0x00fc: self.staticstrings.read,
            0x00ff: Record.ignore,
            0x01ae: self.supbooks.read,         
            0x041E: self.numberformats.read,
        }

        urecord={}
        self.records=Record(0,0)
        last_record=self.records
        ofs=0        
        for sid, data in record_stream(content):
            if sid==0x000A: #EOF
                break
            new_record=loaders.get(sid,Record)(sid,data)
            if new_record:
                last_record.next=new_record
                last_record=new_record
                if sid not in urecord:
                    urecord[sid]=new_record
            #if new_record.__class__==Record:
            #    print '%04x(%08x): %s'%(sid,ofs,poi.utils.DEBUG_RECORDS.get(sid))
            ofs+=len(data)
        self.urecord=urecord
        self.last_record=last_record
        pos=len(content)
        for sheet in sorted(self.sheets,key=lambda s:s.position_of_BOF,reverse=True):
            sheet.sheetdata=content[sheet.position_of_BOF:pos]
            pos=sheet.position_of_BOF
        self.numberformats_map=dict(std_format_strings)
        self.numberformats_map.update(dict([(nf.index,nf.format) for nf in self.numberformats]))
        
    def read_xct(self, sid, data):
        cnt, itab = struct.unpack_from('<hH',data+'\0\0',4)
        supbook=self.supbooks[-1]
        sheet=SupBookSheet(supbook.sheets[itab],cnt)
        supbook.sheets[itab]=sheet
        self.supbooksheet=sheet
        
    def read_crn(self, sid, data):
        self.supbooksheet.append(data)

    def add_extformat(self, xf):
        if not hasattr(xf,'put_record'):
            xf = self.extendedformats[xf]
        return xf.put_record(self)
Example #4
0
File: excel.py Project: ruema/pypoi
class HSSFWorkbook(RecordContainer):
    MAX_ROW = 0xFFFF
    MAX_COLUMN = 0x00FF

    # The maximum number of cell styles in a .xls workbook.
    # The 'official' limit is 4,000, but POI allows a slightly larger number.
    # This extra delta takes into account built-in styles that are automatically
    # created for new workbooks
    #
    # See http://office.microsoft.com/en-us/excel-help/excel-specifications-and-limits-HP005199291.aspx
    MAX_STYLES = 4030

    def __init__(self, filename=None, content=NEWWORKBOOK):
        self.streams = {}
        self.sheets = RecordList(BoundSheet)
        self.fonts = RecordListRW(FontRecord)
        self.numberformats = RecordList(NumberFormat)
        self.extendedformats = RecordListRW(ExtendedFormat)
        self.staticstrings = StaticStrings()
        self.names = RecordList(NameRecord)
        self.supbooks = RecordList(SupBookRecord)
        if filename or content:
            self.read(filename, content)

    def write(self, filename):
        cfb = CFBWriter()
        for name in sorted(self.streams.iterkeys()):
            cfb.put(name, self.streams[name])
        cfb.put(('Workbook', ), self.getdata())
        cfb.write(filename)

    def getdata(self):
        self.staticstrings.newstrings = []
        self.staticstrings.newstring_map = {}
        for sheet in self.sheets:
            if sheet.sheet:
                sheet.sheetdata = sheet.sheet.getdata()
        self.staticstrings.strings = self.staticstrings.newstrings
        del self.staticstrings.newstring_map

        if 0x0085 not in self.urecord:
            self.last_record.next = self.sheets
        result = []
        first = self.records.next
        sheetpos = -1
        while first:
            if sheetpos == -1 and first is self.sheets:
                sheetpos = len(result)
            result.append(first.get_data(self))
            first = first.next
        reslen = sum(map(len, result))
        result.append(self.staticstrings.getdata(self, reslen))
        result.append(struct.pack('<HH', 0x000A, 0))  #EOF
        reslen += len(result[-2]) + 4

        for sheet in self.sheets:
            sheet.position_of_BOF = reslen
            result.append(sheet.sheetdata)
            reslen += len(sheet.sheetdata)
        result[sheetpos] = self.sheets.get_data(self)
        return ''.join(result)

    def read(self, filename, content):
        if filename:
            filehandle = open(filename)
            cfb = CFBReader(filehandle)
            self.streams = cfb.dirtree
            del cfb

            # Normally, the Workbook will be in a POIFS Stream
            # called "Workbook". However, some XLS generators use "WORKBOOK"
            workbook = None
            for wb in ('Book', 'BOOK', 'WORKBOOK', 'Workbook'):
                if (wb, ) in self.streams:
                    workbook = self.streams.pop((wb, ))
            if not workbook:
                raise IOError('The file does not contain a Workbook-entry')
            content = workbook.data

        loaders = {
            0x0018: self.names.read,
            0x0031: self.fonts.read,
            0x0059: self.read_xct,
            0x005A: self.read_crn,
            0x0085: self.sheets.read,
            0x00e0: self.extendedformats.read,
            0x00fc: self.staticstrings.read,
            0x00ff: Record.ignore,
            0x01ae: self.supbooks.read,
            0x041E: self.numberformats.read,
        }

        urecord = {}
        self.records = Record(0, 0)
        last_record = self.records
        ofs = 0
        for sid, data in record_stream(content):
            if sid == 0x000A:  #EOF
                break
            new_record = loaders.get(sid, Record)(sid, data)
            if new_record:
                last_record.next = new_record
                last_record = new_record
                if sid not in urecord:
                    urecord[sid] = new_record
            #if new_record.__class__==Record:
            #    print '%04x(%08x): %s'%(sid,ofs,poi.utils.DEBUG_RECORDS.get(sid))
            ofs += len(data)
        self.urecord = urecord
        self.last_record = last_record
        pos = len(content)
        for sheet in sorted(self.sheets,
                            key=lambda s: s.position_of_BOF,
                            reverse=True):
            sheet.sheetdata = content[sheet.position_of_BOF:pos]
            pos = sheet.position_of_BOF
        self.numberformats_map = dict(std_format_strings)
        self.numberformats_map.update(
            dict([(nf.index, nf.format) for nf in self.numberformats]))

    def read_xct(self, sid, data):
        cnt, itab = struct.unpack_from('<hH', data + '\0\0', 4)
        supbook = self.supbooks[-1]
        sheet = SupBookSheet(supbook.sheets[itab], cnt)
        supbook.sheets[itab] = sheet
        self.supbooksheet = sheet

    def read_crn(self, sid, data):
        self.supbooksheet.append(data)

    def add_extformat(self, xf):
        if not hasattr(xf, 'put_record'):
            xf = self.extendedformats[xf]
        return xf.put_record(self)