Example #1
0
def dataframe_to_rows(df, index=True, header=True):
    """
    Convert a Pandas dataframe into something suitable for passing into a worksheet.
    If index is True then the index will be included, starting one row below the header.
    If header is True then column headers will be included starting one column to the right.
    Formatting should be done by client code.
    """
    import numpy
    from pandas import Timestamp
    blocks = df._data.blocks
    ncols = sum(b.shape[0] for b in blocks)
    data = [None] * ncols

    for b in blocks:
        values = b.values

        if b.dtype.type == numpy.datetime64:
            values = numpy.array([Timestamp(v) for v in values.ravel()])
            values = values.reshape(b.shape)

        result = values.tolist()

        for col_loc, col in zip(b.mgr_locs, result):
            data[col_loc] = col

    if header:
        if df.columns.nlevels > 1:
            rows = expand_levels(df.columns.levels)
        else:
            rows = [list(df.columns.values)]
        for row in rows:
            n = []
            for v in row:
                if isinstance(v, numpy.datetime64):
                    v = Timestamp(v)
                n.append(v)
            row = n
            if index:
                row = [None]*df.index.nlevels + row
            yield row

    cols = None
    if df.index.nlevels > 1:
        cols = zip(*expand_levels(df.index.levels))

    if index:
        yield df.index.names

    for idx, v in enumerate(df.index):
        row = [data[j][idx] for j in range(ncols)]
        if index:
            if cols:
                v = list(next(cols))
            else:
                v = [v]
            row = v + row
        yield row
Example #2
0
def dataframe_to_rows(df, index=True, header=True):
    """
    Convert a Pandas dataframe into something suitable for passing into a worksheet.
    If index is True then the index will be included, starting one row below the header.
    If header is True then column headers will be included starting one column to the right.
    Formatting should be done by client code.
    """
    import numpy
    from pandas import Timestamp
    blocks = df._data.blocks
    ncols = sum(b.shape[0] for b in blocks)
    data = [None] * ncols

    for b in blocks:
        values = b.values

        if b.dtype.type == numpy.datetime64:
            values = numpy.array([Timestamp(v) for v in values.ravel()])
            values = values.reshape(b.shape)

        result = values.tolist()

        for col_loc, col in zip(b.mgr_locs, result):
            data[col_loc] = col

    if header:
        if df.columns.nlevels > 1:
            rows = expand_levels(df.columns.levels)
        else:
            rows = [list(df.columns.values)]
        for row in rows:
            n = []
            for v in row:
                if isinstance(v, numpy.datetime64):
                    v = Timestamp(v)
                n.append(v)
            row = n
            if index:
                row = [None] * df.index.nlevels + row
            yield row

    cols = None
    if df.index.nlevels > 1:
        cols = zip(*expand_levels(df.index.levels))

    if index:
        yield df.index.names

    for idx, v in enumerate(df.index):
        row = [data[j][idx] for j in range(ncols)]
        if index:
            if cols:
                v = list(next(cols))
            else:
                v = [v]
            row = v + row
        yield row
Example #3
0
def test_read_fast_integrated_numbers_2(sample_workbook):
    wb = sample_workbook
    query_range = 'K1:K30'
    expected = expected = [[(x + 1) / 100.0] for x in range(30)]
    ws = wb['Sheet2 - Numbers']
    for row, expected_row in zip(ws.iter_rows(query_range), expected):
        row_values = [x.value for x in row]
        assert row_values == expected_row
Example #4
0
def test_read_fast_integrated_numbers(sample_workbook):
    wb = sample_workbook
    expected = [[x + 1] for x in range(30)]
    query_range = 'D1:D30'
    ws = wb['Sheet2 - Numbers']
    for row, expected_row in zip(ws.iter_rows(query_range), expected):
        row_values = [x.value for x in row]
        assert row_values == expected_row
Example #5
0
 def test_squared_range(self, Worksheet):
     ws = Worksheet(Workbook())
     expected = [
         ('A1', 'B1', 'C1'),
         ('A2', 'B2', 'C2'),
         ('A3', 'B3', 'C3'),
         ('A4', 'B4', 'C4'),
     ]
     rows = ws.get_squared_range(1, 1, 3, 4)
     for row, coord in zip(rows, expected):
         assert tuple(c.coordinate for c in row) == coord
 def test_squared_range(self):
     ws = Worksheet(self.wb)
     expected = [
         ('A1', 'B1', 'C1'),
         ('A2', 'B2', 'C2'),
         ('A3', 'B3', 'C3'),
         ('A4', 'B4', 'C4'),
     ]
     rows = ws.get_squared_range(1, 1, 3, 4)
     for row, coord in zip(rows, expected):
         assert tuple(c.coordinate for c in row) == coord
    def test_iter_rows_offset(self):
        ws = Worksheet(self.wb)
        rows = ws.iter_rows('A1:C4', 1, 3)
        expected = [
            ('D2', 'E2', 'F2'),
            ('D3', 'E3', 'F3'),
            ('D4', 'E4', 'F4'),
            ('D5', 'E5', 'F5'),
        ]

        for row, coord in zip(rows, expected):
            assert tuple(c.coordinate for c in row) == coord
Example #8
0
    def test_iter_rows_offset(self, Worksheet):
        ws = Worksheet(Workbook())
        rows = ws.iter_rows('A1:C4', 1, 3)
        expected = [
            ('D2', 'E2', 'F2'),
            ('D3', 'E3', 'F3'),
            ('D4', 'E4', 'F4'),
            ('D5', 'E5', 'F5'),
        ]

        for row, coord in zip(rows, expected):
            assert tuple(c.coordinate for c in row) == coord
Example #9
0
    def test_iter_rows(self, Worksheet):
        ws = Worksheet(Workbook())
        expected = [
            ('A1', 'B1', 'C1'),
            ('A2', 'B2', 'C2'),
            ('A3', 'B3', 'C3'),
            ('A4', 'B4', 'C4'),
        ]

        rows = ws.iter_rows('A1:C4')
        for row, coord in zip(rows, expected):
            assert tuple(c.coordinate for c in row) == coord
    def test_iter_rows(self, ):
        ws = Worksheet(self.wb)
        expected = [
            ('A1', 'B1', 'C1'),
            ('A2', 'B2', 'C2'),
            ('A3', 'B3', 'C3'),
            ('A4', 'B4', 'C4'),
        ]

        rows = ws.iter_rows('A1:C4')
        for row, coord in zip(rows, expected):
            assert tuple(c.coordinate for c in row) == coord
Example #11
0
    def test_read_fast_integrated_text(self, sample_workbook):
        expected = [
            ['This is cell A1 in Sheet 1', None, None, None, None, None, None],
            [None, None, None, None, None, None, None],
            [None, None, None, None, None, None, None],
            [None, None, None, None, None, None, None],
            [None, None, None, None, None, None, 'This is cell G5'],
        ]

        wb = sample_workbook
        ws = wb['Sheet1 - Text']
        for row, expected_row in zip(ws.rows, expected):
            row_values = [x.value for x in row]
            assert row_values == expected_row
Example #12
0
    def test_append_2d_list(self, Worksheet):

        ws = Worksheet(Workbook())

        ws.append(['This is A1', 'This is B1'])
        ws.append(['This is A2', 'This is B2'])

        vals = ws.iter_rows('A1:B2')
        expected = (
            ('This is A1', 'This is B1'),
            ('This is A2', 'This is B2'),
        )
        for e, v in zip(expected, flatten(vals)):
            assert e == tuple(v)
    def test_append_2d_list(self):

        ws = Worksheet(self.wb)

        ws.append(['This is A1', 'This is B1'])
        ws.append(['This is A2', 'This is B2'])

        vals = ws.iter_rows('A1:B2')
        expected = (
            ('This is A1', 'This is B1'),
            ('This is A2', 'This is B2'),
        )
        for e, v in zip(expected, flatten(vals)):
            assert e == tuple(v)
Example #14
0
def expand_levels(levels, labels):
    """
    Multiindexes need expanding so that subtitles repeat
    """

    for label, order in zip(levels, labels):
        current = None
        row = []
        for idx in order:
            if current == idx:
                row.append(None)
            else:
                row.append(label[idx])
                current = idx
        yield row
def test_read_external_ranges(datadir):
    datadir.chdir()
    ws = DummyWS("Sheet1")
    wb = DummyWB(ws)
    with open("workbook_external_range.xml") as src:
        xml = src.read()
    named_ranges = list(read_named_ranges(xml, wb))
    assert len(named_ranges) == 4
    expected = [
        ("B1namedrange", "'Sheet1'!$A$1"),
        ("references_external_workbook", "[1]Sheet1!$A$1"),
        ("references_nr_in_ext_wb", "[1]!B2range"),
        ("references_other_named_range", "B1namedrange"),
    ]
    for xlr, target in zip(named_ranges, expected):
        assert xlr.name, xlr.value == target
def test_read_external_ranges(datadir):
    datadir.chdir()
    ws = DummyWS("Sheet1")
    wb = DummyWB(ws)
    with open("workbook_external_range.xml") as src:
        xml = src.read()
    named_ranges = list(read_named_ranges(xml, wb))
    assert len(named_ranges) == 4
    expected = [
        ("B1namedrange", "'Sheet1'!$A$1"),
        ("references_external_workbook", "[1]Sheet1!$A$1"),
        ("references_nr_in_ext_wb", "[1]!B2range"),
        ("references_other_named_range", "B1namedrange"),
    ]
    for xlr, target in zip(named_ranges, expected):
        assert xlr.name, xlr.value == target
Example #17
0
def expand_levels(levels):
    """
    Multiindexes need expanding so that subtitles repeat
    """
    widths = (len(s) for s in levels)
    widths = list(accumulate(widths, operator.mul))
    size = max(widths)

    for level, width in zip(levels, widths):
        padding = size // width  # how wide a title should be
        repeat = width // len(level)  # how often a title is repeated
        row = []
        for v in level:
            title = [None] * padding
            title[0] = v
            row.extend(title)
        row = row * repeat
        yield row
Example #18
0
def expand_levels(levels):
    """
    Multiindexes need expanding so that subtitles repeat
    """
    widths = (len(s) for s in levels)
    widths = list(accumulate(widths, operator.mul))
    size = max(widths)

    for level, width in zip(levels, widths):
        padding = size//width # how wide a title should be
        repeat = width//len(level) # how often a title is repeated
        row = []
        for v in level:
            title = [None]*padding
            title[0] = v
            row.extend(title)
        row = row*repeat
        yield row
Example #19
0
def test_write_comments(datadir):
    datadir.chdir()
    ws = _create_ws()[0]
    cw = CommentWriter(ws)
    content = cw.write_comments()
    with open('comments1.xml') as expected:
        correct = fromstring(expected.read())
        check = fromstring(content)
        # check top-level elements have the same name
        for i, j in zip(correct.getchildren(), check.getchildren()):
            assert i.tag == j.tag

        correct_comments = correct.find('{%s}commentList' %
                                        SHEET_MAIN_NS).getchildren()
        check_comments = check.find('{%s}commentList' %
                                    SHEET_MAIN_NS).getchildren()
        correct_authors = correct.find('{%s}authors' %
                                       SHEET_MAIN_NS).getchildren()
        check_authors = check.find('{%s}authors' % SHEET_MAIN_NS).getchildren()

        # replace author ids with author names
        for i in correct_comments:
            i.attrib["authorId"] = correct_authors[int(
                i.attrib["authorId"])].text
        for i in check_comments:
            i.attrib["authorId"] = check_authors[int(
                i.attrib["authorId"])].text

        # sort the comment list
        correct_comments.sort(key=lambda tag: tag.attrib["ref"])
        check_comments.sort(key=lambda tag: tag.attrib["ref"])
        correct.find('{%s}commentList' % SHEET_MAIN_NS)[:] = correct_comments
        check.find('{%s}commentList' % SHEET_MAIN_NS)[:] = check_comments

        # sort the author list
        correct_authors.sort(key=lambda tag: tag.text)
        check_authors.sort(key=lambda tag: tag.text)
        correct.find('{%s}authors' % SHEET_MAIN_NS)[:] = correct_authors
        check.find('{%s}authors' % SHEET_MAIN_NS)[:] = check_authors

        diff = compare_xml(get_document_content(correct),
                           get_document_content(check))
        assert diff is None, diff
    def test_cols(self):
        ws = Worksheet(self.wb)

        ws.cell('A1').value = 'first'
        ws.cell('C9').value = 'last'
        expected = [
            ('A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9'),
            ('B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9'),
            ('C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'),
        ]

        cols = ws.columns
        for col, coord in zip(cols, expected):
            assert tuple(c.coordinate for c in col) == coord

        assert len(cols) == 3

        assert cols[0][0].value == 'first'
        assert cols[-1][-1].value == 'last'
Example #21
0
    def test_cols(self, Worksheet):
        ws = Worksheet(Workbook())

        ws.cell('A1').value = 'first'
        ws.cell('C9').value = 'last'
        expected = [
            ('A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9'),
            ('B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9'),
            ('C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'),

        ]

        cols = ws.columns
        for col, coord in zip(cols, expected):
            assert tuple(c.coordinate for c in col) == coord

        assert len(cols) == 3

        assert cols[0][0].value == 'first'
        assert cols[-1][-1].value == 'last'
def test_write_comments(datadir):
    datadir.chdir()
    ws = _create_ws()[0]
    cw = CommentWriter(ws)
    content = cw.write_comments()
    with open('comments1.xml') as expected:
        correct = fromstring(expected.read())
        check = fromstring(content)
        # check top-level elements have the same name
        for i, j in zip(correct.getchildren(), check.getchildren()):
            assert i.tag == j.tag

        correct_comments = correct.find('{%s}commentList' % SHEET_MAIN_NS).getchildren()
        check_comments = check.find('{%s}commentList' % SHEET_MAIN_NS).getchildren()
        correct_authors = correct.find('{%s}authors' % SHEET_MAIN_NS).getchildren()
        check_authors = check.find('{%s}authors' % SHEET_MAIN_NS).getchildren()

        # replace author ids with author names
        for i in correct_comments:
            i.attrib["authorId"] = correct_authors[int(i.attrib["authorId"])].text
        for i in check_comments:
            i.attrib["authorId"] = check_authors[int(i.attrib["authorId"])].text

        # sort the comment list
        correct_comments.sort(key=lambda tag: tag.attrib["ref"])
        check_comments.sort(key=lambda tag: tag.attrib["ref"])
        correct.find('{%s}commentList' % SHEET_MAIN_NS)[:] = correct_comments
        check.find('{%s}commentList' % SHEET_MAIN_NS)[:] = check_comments

        # sort the author list
        correct_authors.sort(key=lambda tag: tag.text)
        check_authors.sort(key=lambda tag:tag.text)
        correct.find('{%s}authors' % SHEET_MAIN_NS)[:] = correct_authors
        check.find('{%s}authors' % SHEET_MAIN_NS)[:] = check_authors

        diff = compare_xml(get_document_content(correct), get_document_content(check))
        assert diff is None, diff
Example #23
0
 def __defaults__(self):
     spec = inspect.getargspec(self.__class__.__init__)
     return dict(zip(spec.args[1:], spec.defaults))
Example #24
0
def test_read_fast_integrated_text(sample_workbook):
    wb = sample_workbook
    ws = wb['Sheet1 - Text']
    for row, expected_row in zip(ws.rows, expected):
        row_values = [x.value for x in row]
        assert row_values == expected_row
 def __defaults__(self):
     spec = inspect.getargspec(self.__class__.__init__)
     return dict(zip(spec.args[1:], spec.defaults))