Ejemplo n.º 1
0
def read_properties_core(xml_source):
    """Read assorted file properties."""
    properties = DocumentProperties()
    root = fromstring(xml_source)
    creator_node = root.find(QName(NAMESPACES['dc'], 'creator').text)
    if creator_node is not None:
        properties.creator = creator_node.text
    else:
        properties.creator = ''
    last_modified_by_node = root.find(
        QName(NAMESPACES['cp'], 'lastModifiedBy').text)
    if last_modified_by_node is not None:
        properties.last_modified_by = last_modified_by_node.text
    else:
        properties.last_modified_by = ''

    created_node = root.find(QName(NAMESPACES['dcterms'], 'created').text)
    if created_node is not None:
        properties.created = W3CDTF_to_datetime(created_node.text)
    else:
        properties.created = datetime.datetime.now()

    modified_node = root.find(QName(NAMESPACES['dcterms'], 'modified').text)
    if modified_node is not None:
        properties.modified = W3CDTF_to_datetime(modified_node.text)
    else:
        properties.modified = properties.created

    return properties
Ejemplo n.º 2
0
def read_properties_core(xml_source):
    """Read assorted file properties."""
    properties = DocumentProperties()
    root = fromstring(xml_source)
    creator_node = root.find(QName(NAMESPACES['dc'], 'creator').text)
    if creator_node is not None:
        properties.creator = creator_node.text
    else:
        properties.creator = ''
    last_modified_by_node = root.find(
            QName(NAMESPACES['cp'], 'lastModifiedBy').text)
    if last_modified_by_node is not None:
        properties.last_modified_by = last_modified_by_node.text
    else:
        properties.last_modified_by = ''

    created_node = root.find(QName(NAMESPACES['dcterms'], 'created').text)
    if created_node is not None:
        properties.created = W3CDTF_to_datetime(created_node.text)
    else:
        properties.created = datetime.datetime.now()

    modified_node = root.find(QName(NAMESPACES['dcterms'], 'modified').text)
    if modified_node is not None:
        properties.modified = W3CDTF_to_datetime(modified_node.text)
    else:
        properties.modified = properties.created

    return properties
Ejemplo n.º 3
0
def test_write_properties_core(datadir):
    datadir.join("writer").chdir()
    prop = DocumentProperties()
    prop.creator = 'TEST_USER'
    prop.last_modified_by = 'SOMEBODY'
    prop.created = datetime(2010, 4, 1, 20, 30, 00)
    prop.modified = datetime(2010, 4, 5, 14, 5, 30)
    content = write_properties_core(prop)
    with open('core.xml') as expected:
        diff = compare_xml(content, expected.read())
        assert diff is None
Ejemplo n.º 4
0
def test_write_properties_core(datadir):
    datadir.join("writer").chdir()
    prop = DocumentProperties()
    prop.creator = 'TEST_USER'
    prop.last_modified_by = 'SOMEBODY'
    prop.created = datetime(2010, 4, 1, 20, 30, 00)
    prop.modified = datetime(2010, 4, 5, 14, 5, 30)
    content = write_properties_core(prop)
    with open('core.xml') as expected:
        diff = compare_xml(content, expected.read())
        assert diff is None
Ejemplo n.º 5
0
def read_properties_core(xml_source):
    """Read assorted file properties."""
    properties = DocumentProperties()
    root = fromstring(xml_source)
    properties.creator = root.findtext('{%s}creator' % DCORE_NS, '')
    properties.last_modified_by = root.findtext(
        '{%s}lastModifiedBy' % COREPROPS_NS, '')

    created_node = root.find('{%s}created' % DCTERMS_NS)
    if created_node is not None:
        properties.created = W3CDTF_to_datetime(created_node.text)
    else:
        properties.created = datetime.datetime.now()

    modified_node = root.find('{%s}modified' % DCTERMS_NS)
    if modified_node is not None:
        properties.modified = W3CDTF_to_datetime(modified_node.text)
    else:
        properties.modified = properties.created

    return properties
Ejemplo n.º 6
0
def _load_workbook(wb, archive, filename, use_iterators):

    valid_files = archive.namelist()

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    wb.loaded_theme = archive.read(ARC_THEME)
    style_table = read_style_table(archive.read(ARC_STYLE))

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    for i, sheet_name in enumerate(sheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(archive.read(worksheet_path), wb,
                                    sheet_name, string_table, style_table)
        else:
            xml_source = unpack_worksheet(archive, worksheet_path)
            new_ws = read_worksheet(xml_source, wb, sheet_name, string_table,
                                    style_table, filename, sheet_codename)
        wb.add_sheet(new_ws, index=i)

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
Ejemplo n.º 7
0
def test_read_workbook_with_no_core_properties(datadir):
    from openpyxl.workbook import DocumentProperties
    from openpyxl.reader.excel import _load_workbook

    datadir.join('genuine').chdir()
    archive = ZipFile('empty_with_no_properties.xlsx')
    wb = Workbook()
    default_props = DocumentProperties()
    _load_workbook(wb, archive, None, False, False)
    prop = wb.properties
    assert prop.creator == default_props.creator
    assert prop.last_modified_by == default_props.last_modified_by
    assert prop.title == default_props.title
    assert prop.subject == default_props.subject
    assert prop.description == default_props.description
    assert prop.category == default_props.category
    assert prop.keywords == default_props.keywords
    assert prop.company == default_props.company
    assert prop.created.timetuple()[:9] == default_props.created.timetuple(
    )[:9]  # might break if tests run on the stoke of midnight
    assert prop.modified == prop.created
Ejemplo n.º 8
0
def read_properties_core(xml_source):
    """Read assorted file properties."""
    properties = DocumentProperties()
    root = fromstring(xml_source)
    properties.creator = root.findtext('{%s}creator' % DCORE_NS, '')
    properties.last_modified_by = root.findtext('{%s}lastModifiedBy' % COREPROPS_NS, '')

    created_node = root.find('{%s}created' % DCTERMS_NS)
    if created_node is not None:
        properties.created = W3CDTF_to_datetime(created_node.text)
    else:
        properties.created = datetime.datetime.now()

    modified_node = root.find('{%s}modified' % DCTERMS_NS)
    if modified_node is not None:
        properties.modified = W3CDTF_to_datetime(modified_node.text)
    else:
        properties.modified = properties.created

    return properties
Ejemplo n.º 9
0
 def setup_class(cls):
     make_tmpdir()
     cls.tmp_filename = os.path.join(TMPDIR, 'test.xlsx')
     cls.prop = DocumentProperties()
Ejemplo n.º 10
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if use_iterators:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = '%s/%s' % (PACKAGE_XL, sheet['path'])
        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(
                archive.read(worksheet_path),
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(
                None,
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                worksheet_path=worksheet_path)
        wb.add_sheet(new_ws)

        if not use_iterators:
            # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
Ejemplo n.º 11
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = archive

    if use_iterators:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    content_types = read_content_types(archive.read(ARC_CONTENT_TYPES))
    sheet_types = [(sheet, contyp) for sheet, contyp in content_types
                   if contyp in WORK_OR_CHART_TYPE]
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    worksheet_names = [
        worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types)
        if sheet_type[1] == VALID_WORKSHEET
    ]
    for i, sheet_name in enumerate(worksheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(
                archive.read(worksheet_path),
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(
                None,
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                sheet_codename=sheet_codename)
        wb.add_sheet(new_ws, index=i)

        if not use_iterators:
            # load comments into the worksheet cells
            comments_file = get_comments_file(sheet_codename, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
Ejemplo n.º 12
0
def _load_workbook(wb, archive, filename, read_only, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb._read_workbook_settings(archive.read(ARC_WORKBOOK))

    # what content types do we have?
    cts = dict(read_content_types(archive))
    rels = dict

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_table, color_index, cond_styles = read_style_table(
        archive.read(ARC_STYLE))
    wb.shared_styles = style_table
    wb.style_properties = {'dxf_list': cond_styles}
    wb.cond_styles = cond_styles

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if not read_only:
            new_ws = read_worksheet(archive.read(worksheet_path),
                                    wb,
                                    sheet_name,
                                    shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(None,
                                    wb,
                                    sheet_name,
                                    shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    worksheet_path=worksheet_path)

        new_ws.sheet_state = sheet.get('state') or 'visible'
        wb._add_sheet(new_ws)

        if not read_only:
            # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))