Beispiel #1
0
def iter_rows(workbook_name, sheet_name, xml_source, range_string='', row_offset=0, column_offset=0):

    archive = get_archive_file(workbook_name)

    source = xml_source

    if range_string:
        min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset)
    else:
        min_col, min_row, max_col, max_row = read_dimension(xml_source=source)
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col) + 1
        max_row += 6

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}

    style_table = read_style_table(archive.read(ARC_STYLE))

    source.seek(0)
    p = iterparse(source)

    return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table)
def test_parse_dxfs(datadir):
    datadir.chdir()
    reference_file = 'conditional-formatting.xlsx'
    wb = load_workbook(reference_file)
    assert isinstance(wb, Workbook)
    archive = ZipFile(reference_file, 'r', ZIP_DEFLATED)
    read_xml = archive.read(ARC_STYLE)

    # Verify length
    assert '<dxfs count="164">' in str(read_xml)
    assert len(wb.style_properties['dxf_list']) == 164

    # Verify first dxf style
    reference_file = 'dxf_style.xml'
    with open(reference_file) as expected:
        diff = compare_xml(read_xml, expected.read())
        assert diff is None, diff

    cond_styles = wb.style_properties['dxf_list'][0]
    assert cond_styles['font'].color == Color('FF9C0006')
    assert not cond_styles['font'].bold
    assert not cond_styles['font'].italic
    f = PatternFill(end_color=Color('FFFFC7CE'))
    assert cond_styles['fill'] == f

    # Verify that the dxf styles stay the same when they're written and read back in.
    w = StyleWriter(wb)
    w._write_dxfs()
    write_xml = get_xml(w._root)
    read_style_prop = read_style_table(write_xml)
    assert len(read_style_prop['dxf_list']) == len(wb.style_properties['dxf_list'])
    for i, dxf in enumerate(read_style_prop['dxf_list']):
        assert repr(wb.style_properties['dxf_list'][i] == dxf)
Beispiel #3
0
def _load_workbook(wb, archive, filename, use_iterators):

    valid_files = archive.namelist()

    # get workbook-level information
    wb.properties = read_properties_core(archive.read(ARC_CORE))
    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    style_table = read_style_table(archive.read(ARC_STYLE))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    for i, sheet_name in enumerate(sheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(archive.read(worksheet_path), wb,
                                    sheet_name, string_table, style_table)
        else:
            xml_source = unpack_worksheet(archive, worksheet_path)
            new_ws = read_worksheet(xml_source, wb, sheet_name, string_table,
                                    style_table, filename, sheet_codename)
            #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename)
        wb.add_sheet(new_ws, index=i)

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
Beispiel #4
0
def test_read_cell_style():
    reference_file = os.path.join(
            DATADIR, 'reader', 'empty-workbook-styles.xml')
    with open(reference_file, 'r') as handle:
        content = handle.read()
    style_table = read_style_table(content)
    eq_(2, len(style_table))
def test_parse_dxfs(datadir):
    datadir.chdir()
    reference_file = 'conditional-formatting.xlsx'
    wb = load_workbook(reference_file)
    assert isinstance(wb, Workbook)
    archive = ZipFile(reference_file, 'r', ZIP_DEFLATED)
    read_xml = archive.read(ARC_STYLE)

    # Verify length
    assert '<dxfs count="164">' in str(read_xml)
    assert len(wb.style_properties['dxf_list']) == 164

    # Verify first dxf style
    reference_file = 'dxf_style.xml'
    with open(reference_file) as expected:
        diff = compare_xml(read_xml, expected.read())
        assert diff is None, diff

    cond_styles = wb.style_properties['dxf_list'][0]
    assert cond_styles['font'].color == Color('FF9C0006')
    assert not cond_styles['font'].bold
    assert not cond_styles['font'].italic
    f = PatternFill(end_color=Color('FFFFC7CE'))
    assert cond_styles['fill'] == f

    # Verify that the dxf styles stay the same when they're written and read back in.
    w = StyleWriter(wb)
    w._write_dxfs()
    write_xml = tostring(w._root)
    read_style_prop = read_style_table(write_xml)
    assert len(read_style_prop[2]) == len(wb.style_properties['dxf_list'])
    for i, dxf in enumerate(read_style_prop[2]):
        assert repr(wb.style_properties['dxf_list'][i] == dxf)
def test_parse_dxfs():
    reference_file = os.path.join(DATADIR, 'reader',
                                  'conditional-formatting.xlsx')
    wb = load_workbook(reference_file)
    archive = ZipFile(reference_file, 'r', ZIP_DEFLATED)
    read_xml = archive.read(ARC_STYLE)

    # Verify length
    assert '<dxfs count="164">' in str(read_xml)
    assert len(wb.style_properties['dxf_list']) == 164

    # Verify first dxf style
    reference_file = os.path.join(DATADIR, 'writer', 'expected',
                                  'dxf_style.xml')
    with open(reference_file) as expected:
        diff = compare_xml(read_xml, expected.read())
        assert diff is None, diff

    cond_styles = wb.style_properties['dxf_list'][0]
    assert cond_styles['font']['color'] == Color('FF9C0006')
    assert cond_styles['font']['bold'] == False
    assert cond_styles['font']['italic'] == False
    f = Fill()
    f.end_color = Color('FFFFC7CE')
    assert cond_styles['fill'][0] == f

    # Verify that the dxf styles stay the same when they're written and read back in.
    w = StyleWriter(wb)
    w._write_dxfs()
    write_xml = get_xml(w._root)
    read_style_prop = read_style_table(write_xml)
    assert len(read_style_prop['dxf_list']) == len(
        wb.style_properties['dxf_list'])
    for i, dxf in enumerate(read_style_prop['dxf_list']):
        assert repr(wb.style_properties['dxf_list'][i] == dxf)
Beispiel #7
0
def test_read_cell_style():
    reference_file = os.path.join(DATADIR, 'reader',
                                  'empty-workbook-styles.xml')
    with open(reference_file, 'r') as handle:
        content = handle.read()
    style_table = read_style_table(content)
    eq_(2, len(style_table))
Beispiel #8
0
def test_read_simple_style_mappings(datadir):
    datadir.chdir()
    with open("simple-styles.xml") as content:
        style_properties = read_style_table(content.read())[0]
        assert len(style_properties) == 4
        assert numbers.BUILTIN_FORMATS[9] == style_properties[1].number_format
        assert 'yyyy-mm-dd' == style_properties[2].number_format
Beispiel #9
0
def test_read_simple_style_mappings(datadir):
    datadir.chdir()
    with open("simple-styles.xml") as content:
        style_properties = read_style_table(content.read())[0]
        assert len(style_properties) == 4
        assert numbers.BUILTIN_FORMATS[9] == style_properties[1].number_format
        assert 'yyyy-mm-dd' == style_properties[2].number_format
Beispiel #10
0
def iter_rows(workbook_name,
              sheet_name,
              xml_source,
              shared_date,
              string_table,
              range_string='',
              row_offset=0,
              column_offset=0):

    archive = get_archive_file(workbook_name)

    source = xml_source

    if range_string:
        min_col, min_row, max_col, max_row = get_range_boundaries(
            range_string, row_offset, column_offset)
    else:
        min_col, min_row, max_col, max_row = read_dimension(xml_source=source)
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col) + 1
        max_row += 6

    style_table = read_style_table(archive.read(ARC_STYLE))

    source.seek(0)
    p = iterparse(source)

    return get_squared_range(p, min_col, min_row, max_col, max_row,
                             string_table, style_table, shared_date)
def test_parse_dxfs():
    reference_file = os.path.join(DATADIR, 'reader', 'conditional-formatting.xlsx')
    wb = load_workbook(reference_file)
    archive = ZipFile(reference_file, 'r', ZIP_DEFLATED)
    read_xml = archive.read(ARC_STYLE)

    # Verify length
    assert '<dxfs count="164">' in str(read_xml)
    assert len(wb.style_properties['dxf_list']) == 164

    # Verify first dxf style
    reference_file = os.path.join(DATADIR, 'writer', 'expected', 'dxf_style.xml')
    with open(reference_file) as expected:
        diff = compare_xml(read_xml, expected.read())
        assert diff is None, diff

    cond_styles = wb.style_properties['dxf_list'][0]
    assert cond_styles['font']['color'] == Color('FF9C0006')
    assert cond_styles['font']['bold'] == False
    assert cond_styles['font']['italic'] == False
    f = Fill()
    f.end_color = Color('FFFFC7CE')
    assert cond_styles['fill'][0] == f

    # Verify that the dxf styles stay the same when they're written and read back in.
    w = StyleWriter(wb)
    w._write_dxfs()
    write_xml = get_xml(w._root)
    read_style_prop = read_style_table(write_xml)
    assert len(read_style_prop['dxf_list']) == len(wb.style_properties['dxf_list'])
    for i, dxf in enumerate(read_style_prop['dxf_list']):
        assert repr(wb.style_properties['dxf_list'][i] == dxf)
Beispiel #12
0
def _load_workbook(wb, archive, filename, use_iterators):

    valid_files = archive.namelist()

    # get workbook-level information
    wb.properties = read_properties_core(archive.read(ARC_CORE))
    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    style_table = read_style_table(archive.read(ARC_STYLE))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    for i, sheet_name in enumerate(sheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table)
        else:
            xml_source = unpack_worksheet(archive, worksheet_path)
            new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename)
            #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename)
        wb.add_sheet(new_ws, index = i)

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
Beispiel #13
0
def test_read_complex_style_mappings(datadir):
    datadir.chdir()
    with open("complex-styles.xml") as content:
        style_properties = read_style_table(content.read())
        assert style_properties['table'] == {
            0: 0,
            1: 1,
            2: 2,
            3: 3,
            4: 4,
            5: 5,
            6: 6,
            7: 7,
            8: 8,
            9: 9,
            10: 10,
            11: 11,
            12: 12,
            13: 0,
            14: 13,
            15: 14,
            16: 15,
            17: 10,
            18: 16,
            19: 17,
            20: 18,
            21: 19,
            22: 0,
            23: 10,
            24: 20,
            25: 21,
            26: 22,
            27: 23,
            28: 24
        }
Beispiel #14
0
def test_read_style():
    reference_file = os.path.join(DATADIR, 'reader', 'simple-styles.xml')
    with open(reference_file, 'r') as handle:
        content = handle.read()
    style_table = read_style_table(content)
    eq_(4, len(style_table))
    eq_(NumberFormat._BUILTIN_FORMATS[9],
        style_table[1].number_format.format_code)
    eq_('yyyy-mm-dd', style_table[2].number_format.format_code)
Beispiel #15
0
def test_read_style():
    reference_file = os.path.join(DATADIR, 'reader', 'simple-styles.xml')
    with open(reference_file, 'r') as handle:
        content = handle.read()
    style_table = read_style_table(content)
    eq_(4, len(style_table))
    eq_(NumberFormat._BUILTIN_FORMATS[9],
            style_table[1].number_format.format_code)
    eq_('yyyy-mm-dd', style_table[2].number_format.format_code)
Beispiel #16
0
def test_read_simple_style_mappings(datadir):
    datadir.chdir()
    with open("simple-styles.xml") as content:
        style_properties = read_style_table(content.read())
        style_table = style_properties['table']
        style_list = style_properties['list']
        assert len(style_table) == 4
        assert NumberFormat._BUILTIN_FORMATS[9] == style_list[style_table[1]].number_format
        assert 'yyyy-mm-dd' == style_list[style_table[2]].number_format
Beispiel #17
0
def test_read_complex_style_mappings(datadir):
    datadir.chdir()
    with open("complex-styles.xml") as content:
        style_properties = read_style_table(content.read())
        assert style_properties['table'] == {0:0, 1:1, 2:2, 3:3, 4:4, 5:5,
                                             6:6, 7:7, 8:8, 9:9, 10:10, 11:11,
                                             12:12, 13:0, 14:13, 15:14, 16:15,
                                             17:10, 18:16, 19:17, 20:18, 21:19,
                                             22:0, 23:10, 24:20, 25:21, 26:22,
                                             27: 23, 28: 24}
Beispiel #18
0
def test_read_cell_style():
    reference_file = os.path.join(DATADIR, 'reader', 'empty-workbook-styles.xml')
    handle = open(reference_file, 'r')
    try:
        content = handle.read()
    finally:
        handle.close()
    style_properties = read_style_table(content)
    style_table = style_properties['table']
    assert len(style_table) == 2
Beispiel #19
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = archive

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    content_types = read_content_types(archive.read(ARC_CONTENT_TYPES))
    sheet_types = [(sheet, contyp) for sheet, contyp in content_types if contyp in WORK_OR_CHART_TYPE]
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    worksheet_names = [worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types) if sheet_type[1] == VALID_WORKSHEET]
    for i, sheet_name in enumerate(worksheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, style_properties['color_index'], keep_vba=keep_vba)
        else:
            xml_source = unpack_worksheet(archive, worksheet_path)
            new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, style_properties['color_index'], filename, sheet_codename)
        wb.add_sheet(new_ws, index=i)

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
Beispiel #20
0
def test_read_style():
    reference_file = os.path.join(DATADIR, 'reader', 'simple-styles.xml')

    handle = open(reference_file, 'r')
    try:
        content = handle.read()
    finally:
        handle.close()
    style_properties = read_style_table(content)
    style_table = style_properties['table']
    assert len(style_table) == 4
    assert NumberFormat._BUILTIN_FORMATS[9] == style_table[1].number_format
    assert 'yyyy-mm-dd' == style_table[2].number_format
Beispiel #21
0
    def get_squared_range(self, min_col, min_row, max_col, max_row):
        expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)]
        current_row = min_row

        style_properties = read_style_table(self.archive.read(ARC_STYLE))
        style_table = style_properties.pop('table')

        for row, cells in groupby(self.get_cells(min_row, min_col,
                                                 max_row, max_col),
                                  operator.attrgetter('row')):
            full_row = []
            if current_row < row:

                for gap_row in xrange(current_row, row):
                    dummy_cells = get_missing_cells(gap_row, expected_columns)
                    yield tuple([dummy_cells[column] for column in expected_columns])
                    current_row = row

            temp_cells = list(cells)
            retrieved_columns = dict([(c.column, c) for c in temp_cells])
            missing_columns = list(set(expected_columns) - set(retrieved_columns.keys()))
            replacement_columns = get_missing_cells(row, missing_columns)

            for column in expected_columns:
                if column in retrieved_columns:
                    cell = retrieved_columns[column]
                    if cell.style_id is not None:
                        style = style_table[int(cell.style_id)]
                        cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212
                    if cell.internal_value is not None:
                        if cell.data_type in Cell.TYPE_STRING:
                            cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212
                        elif cell.data_type == Cell.TYPE_BOOL:
                            cell = cell._replace(internal_value=cell.internal_value == '1')
                        elif cell.is_date:
                            cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value)))
                        elif cell.data_type == Cell.TYPE_NUMERIC:
                            cell = cell._replace(internal_value=float(cell.internal_value))
                        elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
                            cell = cell._replace(internal_value=unicode(cell.internal_value))
                    full_row.append(cell)
                else:
                    full_row.append(replacement_columns[column])
            current_row = row + 1
            yield tuple(full_row)
Beispiel #22
0
def _load_workbook(wb, archive, filename, use_iterators):

    valid_files = archive.namelist()

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_table = read_style_table(archive.read(ARC_STYLE))

    wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    for i, sheet_name in enumerate(sheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table)
        else:
            xml_source = unpack_worksheet(archive, worksheet_path)
            new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename)
        wb.add_sheet(new_ws, index=i)

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def iter_rows(workbook_name, sheet_name, xml_source, shared_date, string_table, range_string='', row_offset=0, column_offset=0):

    archive = get_archive_file(workbook_name)

    source = xml_source

    if range_string:
        min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset)
    else:
        min_col, min_row, max_col, max_row = read_dimension(xml_source=source)
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col) + 1
        max_row += 6

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')

    source.seek(0)
    p = iterparse(source)

    return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
Beispiel #24
0
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except (BadZipfile, RuntimeError, IOError, ValueError), e:
        raise InvalidFileException(unicode(e))
    wb = Workbook()
    wb._set_optimized_read()
    try:
        # get workbook-level information
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        try:
            string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
        except KeyError:
            string_table = {}
        style_table = read_style_table(archive.read(ARC_STYLE))

        # get worksheets
        wb.worksheets = []  # remove preset worksheet
        sheet_names = read_sheets_titles(archive.read(ARC_APP))
        for i, sheet_name in enumerate(sheet_names):
            sheet_codename = 'sheet%d.xml' % (i + 1)
            worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

            if not use_iterators:
                new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table)
            else:
                xml_source = unpack_worksheet(archive, worksheet_path)
                new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename)
                #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename)
            wb.add_sheet(new_ws, index = i)
Beispiel #25
0
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=False, guess_types=False, data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only or use_iterators

    wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only)

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb.active = read_workbook_settings(archive.read(ARC_WORKBOOK)) or 0

    # what content types do we have?
    cts = dict(read_content_types(archive))

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    wb.is_template = XLTX in cts or XLTM in cts

    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    parsed_styles = read_style_table(archive)
    if parsed_styles is not None:
        wb._differential_styles = parsed_styles.differential_styles
        wb._cell_styles = parsed_styles.cell_styles
        wb._named_styles = parsed_styles.named_styles
        wb._colors = parsed_styles.color_index
        wb._borders = parsed_styles.border_list
        wb._fonts = parsed_styles.font_list
        wb._fills = parsed_styles.fill_list
        wb._number_formats = parsed_styles.number_formats
        wb._protections = parsed_styles.protections
        wb._alignments = parsed_styles.alignments
        wb._colors = parsed_styles.color_index

    wb.excel_base_date = read_excel_base_date(archive)

    # get worksheets
    wb._sheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if read_only:
            new_ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                       shared_strings)
            wb._add_sheet(new_ws)
        else:
            fh = archive.open(worksheet_path)
            parser = WorkSheetParser(wb, sheet_name, fh, shared_strings)
            parser.parse()
            new_ws = wb[sheet_name]
        new_ws.sheet_state = sheet['state']

        if wb.vba_archive is not None and new_ws.legacy_drawing is not None:
            # We need to get the file name of the legacy drawing
            dirname, basename = worksheet_path.rsplit('/', 1)
            rels_path = '/'.join((dirname, '_rels', basename + '.rels'))
            rels = get_dependents(archive, rels_path)
            new_ws.legacy_drawing = rels[new_ws.legacy_drawing].target

        if not read_only:
        # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive, valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))
            drawings_file = get_drawings_file(worksheet_path, archive, valid_files)
            if drawings_file is not None:
                read_drawings(new_ws, drawings_file, archive, valid_files)
    wb._differential_styles = [] # reset
    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))


    archive.close()
    return wb
Beispiel #26
0
def _load_workbook(wb, archive, filename, read_only, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb._read_workbook_settings(archive.read(ARC_WORKBOOK))

    # what content types do we have?
    cts = dict(read_content_types(archive))
    rels = dict

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE))
    wb.shared_styles = style_table
    wb.style_properties = {'dxf_list':cond_styles}
    wb.cond_styles = cond_styles

    wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if not read_only:
            new_ws = read_worksheet(archive.read(worksheet_path), wb,
                                    sheet_name, shared_strings, style_table,
                                    color_index=color_index,
                                    keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(None, wb, sheet_name, shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    worksheet_path=worksheet_path)

        new_ws.sheet_state = sheet.get('state') or 'visible'
        wb._add_sheet(new_ws)

        if not read_only:
        # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive, valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))
Beispiel #27
0
def _load_workbook(wb, archive, filename, read_only, keep_vba):
    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb._read_workbook_settings(archive.read(ARC_WORKBOOK))

    # what content types do we have?
    cts = dict(read_content_types(archive))
    rels = dict

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_table, color_index, cond_styles = read_style_table(
        archive.read(ARC_STYLE))
    wb.shared_styles = style_table
    wb.style_properties = {'dxf_list': cond_styles}
    wb.cond_styles = cond_styles

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if not read_only:
            new_ws = read_worksheet(archive.read(worksheet_path),
                                    wb,
                                    sheet_name,
                                    shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(None,
                                    wb,
                                    sheet_name,
                                    shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    worksheet_path=worksheet_path)

        new_ws.sheet_state = sheet.get('state') or 'visible'
        wb._add_sheet(new_ws)

        if not read_only:
            # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

            drawings_file = get_drawings_file(worksheet_path, archive,
                                              valid_files)
            if drawings_file is not None:
                read_drawings(new_ws, drawings_file, archive, valid_files)

    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))
Beispiel #28
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if use_iterators:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = '%s/%s' % (PACKAGE_XL, sheet['path'])
        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(
                archive.read(worksheet_path),
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(
                None,
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                worksheet_path=worksheet_path)
        wb.add_sheet(new_ws)

        if not use_iterators:
            # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
Beispiel #29
0
def test_read_complex_style_mappings(datadir):
    datadir.chdir()
    with open("complex-styles.xml") as content:
        style_properties = read_style_table(content.read())[0]
        assert len(style_properties) == 29
        assert style_properties[-1].font.bold is False
Beispiel #30
0
def test_read_cell_style(datadir):
    datadir.chdir()
    with open("empty-workbook-styles.xml") as content:
        style_properties = read_style_table(content.read())
        assert len(style_properties) == 3
Beispiel #31
0
def test_read_complex_style_mappings(datadir):
    datadir.chdir()
    with open("complex-styles.xml") as content:
        style_properties = read_style_table(content.read())[0]
        assert len(style_properties) == 29
        assert style_properties[-1].font.bold is False
Beispiel #32
0
def test_read_cell_style(datadir):
    datadir.chdir()
    with open("empty-workbook-styles.xml") as content:
        style_properties = read_style_table(content.read())
        assert len(style_properties) == 3
Beispiel #33
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if use_iterators:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        shared_strings = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        shared_strings = []

    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.shared_styles = style_properties.pop('list')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(archive.read(worksheet_path), wb,
                                    sheet_name, shared_strings, style_table,
                                    color_index=style_properties['color_index'],
                                    keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(None, wb, sheet_name, shared_strings,
                                    style_table,
                                    color_index=style_properties['color_index'],
                                    worksheet_path=worksheet_path)
        wb.add_sheet(new_ws)

        if not use_iterators:
        # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive, valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))
Beispiel #34
0
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except (BadZipfile, RuntimeError, IOError, ValueError), e:
        raise InvalidFileException(unicode(e))
    wb = Workbook()
    wb._set_optimized_read()
    try:
        # get workbook-level information
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        try:
            string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
        except KeyError:
            string_table = {}
        style_table = read_style_table(archive.read(ARC_STYLE))

        # get worksheets
        wb.worksheets = []  # remove preset worksheet
        sheet_names = read_sheets_titles(archive.read(ARC_APP))
        for i, sheet_name in enumerate(sheet_names):
            sheet_codename = 'sheet%d.xml' % (i + 1)
            worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

            if not use_iterators:
                new_ws = read_worksheet(archive.read(worksheet_path), wb,
                                        sheet_name, string_table, style_table)
            else:
                xml_source = unpack_worksheet(archive, worksheet_path)
                new_ws = read_worksheet(xml_source, wb, sheet_name,
                                        string_table, style_table, filename,
Beispiel #35
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = archive

    if use_iterators:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    content_types = read_content_types(archive.read(ARC_CONTENT_TYPES))
    sheet_types = [(sheet, contyp) for sheet, contyp in content_types
                   if contyp in WORK_OR_CHART_TYPE]
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    worksheet_names = [
        worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types)
        if sheet_type[1] == VALID_WORKSHEET
    ]
    for i, sheet_name in enumerate(worksheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(
                archive.read(worksheet_path),
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(
                None,
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                sheet_codename=sheet_codename)
        wb.add_sheet(new_ws, index=i)

        if not use_iterators:
            # load comments into the worksheet cells
            comments_file = get_comments_file(sheet_codename, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)