def dataframe_to_rows(df, index=True, header=True): """ Convert a Pandas dataframe into something suitable for passing into a worksheet. If index is True then the index will be included, starting one row below the header. If header is True then column headers will be included starting one column to the right. Formatting should be done by client code. """ import numpy from pandas import Timestamp blocks = df._data.blocks ncols = sum(b.shape[0] for b in blocks) data = [None] * ncols for b in blocks: values = b.values if b.dtype.type == numpy.datetime64: values = numpy.array([Timestamp(v) for v in values.ravel()]) values = values.reshape(b.shape) result = values.tolist() for col_loc, col in zip(b.mgr_locs, result): data[col_loc] = col if header: if df.columns.nlevels > 1: rows = expand_levels(df.columns.levels) else: rows = [list(df.columns.values)] for row in rows: n = [] for v in row: if isinstance(v, numpy.datetime64): v = Timestamp(v) n.append(v) row = n if index: row = [None]*df.index.nlevels + row yield row cols = None if df.index.nlevels > 1: cols = zip(*expand_levels(df.index.levels)) if index: yield df.index.names for idx, v in enumerate(df.index): row = [data[j][idx] for j in range(ncols)] if index: if cols: v = list(next(cols)) else: v = [v] row = v + row yield row
def dataframe_to_rows(df, index=True, header=True): """ Convert a Pandas dataframe into something suitable for passing into a worksheet. If index is True then the index will be included, starting one row below the header. If header is True then column headers will be included starting one column to the right. Formatting should be done by client code. """ import numpy from pandas import Timestamp blocks = df._data.blocks ncols = sum(b.shape[0] for b in blocks) data = [None] * ncols for b in blocks: values = b.values if b.dtype.type == numpy.datetime64: values = numpy.array([Timestamp(v) for v in values.ravel()]) values = values.reshape(b.shape) result = values.tolist() for col_loc, col in zip(b.mgr_locs, result): data[col_loc] = col if header: if df.columns.nlevels > 1: rows = expand_levels(df.columns.levels) else: rows = [list(df.columns.values)] for row in rows: n = [] for v in row: if isinstance(v, numpy.datetime64): v = Timestamp(v) n.append(v) row = n if index: row = [None] * df.index.nlevels + row yield row cols = None if df.index.nlevels > 1: cols = zip(*expand_levels(df.index.levels)) if index: yield df.index.names for idx, v in enumerate(df.index): row = [data[j][idx] for j in range(ncols)] if index: if cols: v = list(next(cols)) else: v = [v] row = v + row yield row
def test_read_fast_integrated_numbers_2(sample_workbook): wb = sample_workbook query_range = 'K1:K30' expected = expected = [[(x + 1) / 100.0] for x in range(30)] ws = wb['Sheet2 - Numbers'] for row, expected_row in zip(ws.iter_rows(query_range), expected): row_values = [x.value for x in row] assert row_values == expected_row
def test_read_fast_integrated_numbers(sample_workbook): wb = sample_workbook expected = [[x + 1] for x in range(30)] query_range = 'D1:D30' ws = wb['Sheet2 - Numbers'] for row, expected_row in zip(ws.iter_rows(query_range), expected): row_values = [x.value for x in row] assert row_values == expected_row
def test_squared_range(self, Worksheet): ws = Worksheet(Workbook()) expected = [ ('A1', 'B1', 'C1'), ('A2', 'B2', 'C2'), ('A3', 'B3', 'C3'), ('A4', 'B4', 'C4'), ] rows = ws.get_squared_range(1, 1, 3, 4) for row, coord in zip(rows, expected): assert tuple(c.coordinate for c in row) == coord
def test_squared_range(self): ws = Worksheet(self.wb) expected = [ ('A1', 'B1', 'C1'), ('A2', 'B2', 'C2'), ('A3', 'B3', 'C3'), ('A4', 'B4', 'C4'), ] rows = ws.get_squared_range(1, 1, 3, 4) for row, coord in zip(rows, expected): assert tuple(c.coordinate for c in row) == coord
def test_iter_rows_offset(self): ws = Worksheet(self.wb) rows = ws.iter_rows('A1:C4', 1, 3) expected = [ ('D2', 'E2', 'F2'), ('D3', 'E3', 'F3'), ('D4', 'E4', 'F4'), ('D5', 'E5', 'F5'), ] for row, coord in zip(rows, expected): assert tuple(c.coordinate for c in row) == coord
def test_iter_rows_offset(self, Worksheet): ws = Worksheet(Workbook()) rows = ws.iter_rows('A1:C4', 1, 3) expected = [ ('D2', 'E2', 'F2'), ('D3', 'E3', 'F3'), ('D4', 'E4', 'F4'), ('D5', 'E5', 'F5'), ] for row, coord in zip(rows, expected): assert tuple(c.coordinate for c in row) == coord
def test_iter_rows(self, Worksheet): ws = Worksheet(Workbook()) expected = [ ('A1', 'B1', 'C1'), ('A2', 'B2', 'C2'), ('A3', 'B3', 'C3'), ('A4', 'B4', 'C4'), ] rows = ws.iter_rows('A1:C4') for row, coord in zip(rows, expected): assert tuple(c.coordinate for c in row) == coord
def test_iter_rows(self, ): ws = Worksheet(self.wb) expected = [ ('A1', 'B1', 'C1'), ('A2', 'B2', 'C2'), ('A3', 'B3', 'C3'), ('A4', 'B4', 'C4'), ] rows = ws.iter_rows('A1:C4') for row, coord in zip(rows, expected): assert tuple(c.coordinate for c in row) == coord
def test_read_fast_integrated_text(self, sample_workbook): expected = [ ['This is cell A1 in Sheet 1', None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, 'This is cell G5'], ] wb = sample_workbook ws = wb['Sheet1 - Text'] for row, expected_row in zip(ws.rows, expected): row_values = [x.value for x in row] assert row_values == expected_row
def test_append_2d_list(self, Worksheet): ws = Worksheet(Workbook()) ws.append(['This is A1', 'This is B1']) ws.append(['This is A2', 'This is B2']) vals = ws.iter_rows('A1:B2') expected = ( ('This is A1', 'This is B1'), ('This is A2', 'This is B2'), ) for e, v in zip(expected, flatten(vals)): assert e == tuple(v)
def test_append_2d_list(self): ws = Worksheet(self.wb) ws.append(['This is A1', 'This is B1']) ws.append(['This is A2', 'This is B2']) vals = ws.iter_rows('A1:B2') expected = ( ('This is A1', 'This is B1'), ('This is A2', 'This is B2'), ) for e, v in zip(expected, flatten(vals)): assert e == tuple(v)
def expand_levels(levels, labels): """ Multiindexes need expanding so that subtitles repeat """ for label, order in zip(levels, labels): current = None row = [] for idx in order: if current == idx: row.append(None) else: row.append(label[idx]) current = idx yield row
def test_read_external_ranges(datadir): datadir.chdir() ws = DummyWS("Sheet1") wb = DummyWB(ws) with open("workbook_external_range.xml") as src: xml = src.read() named_ranges = list(read_named_ranges(xml, wb)) assert len(named_ranges) == 4 expected = [ ("B1namedrange", "'Sheet1'!$A$1"), ("references_external_workbook", "[1]Sheet1!$A$1"), ("references_nr_in_ext_wb", "[1]!B2range"), ("references_other_named_range", "B1namedrange"), ] for xlr, target in zip(named_ranges, expected): assert xlr.name, xlr.value == target
def expand_levels(levels): """ Multiindexes need expanding so that subtitles repeat """ widths = (len(s) for s in levels) widths = list(accumulate(widths, operator.mul)) size = max(widths) for level, width in zip(levels, widths): padding = size // width # how wide a title should be repeat = width // len(level) # how often a title is repeated row = [] for v in level: title = [None] * padding title[0] = v row.extend(title) row = row * repeat yield row
def expand_levels(levels): """ Multiindexes need expanding so that subtitles repeat """ widths = (len(s) for s in levels) widths = list(accumulate(widths, operator.mul)) size = max(widths) for level, width in zip(levels, widths): padding = size//width # how wide a title should be repeat = width//len(level) # how often a title is repeated row = [] for v in level: title = [None]*padding title[0] = v row.extend(title) row = row*repeat yield row
def test_write_comments(datadir): datadir.chdir() ws = _create_ws()[0] cw = CommentWriter(ws) content = cw.write_comments() with open('comments1.xml') as expected: correct = fromstring(expected.read()) check = fromstring(content) # check top-level elements have the same name for i, j in zip(correct.getchildren(), check.getchildren()): assert i.tag == j.tag correct_comments = correct.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() check_comments = check.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() correct_authors = correct.find('{%s}authors' % SHEET_MAIN_NS).getchildren() check_authors = check.find('{%s}authors' % SHEET_MAIN_NS).getchildren() # replace author ids with author names for i in correct_comments: i.attrib["authorId"] = correct_authors[int( i.attrib["authorId"])].text for i in check_comments: i.attrib["authorId"] = check_authors[int( i.attrib["authorId"])].text # sort the comment list correct_comments.sort(key=lambda tag: tag.attrib["ref"]) check_comments.sort(key=lambda tag: tag.attrib["ref"]) correct.find('{%s}commentList' % SHEET_MAIN_NS)[:] = correct_comments check.find('{%s}commentList' % SHEET_MAIN_NS)[:] = check_comments # sort the author list correct_authors.sort(key=lambda tag: tag.text) check_authors.sort(key=lambda tag: tag.text) correct.find('{%s}authors' % SHEET_MAIN_NS)[:] = correct_authors check.find('{%s}authors' % SHEET_MAIN_NS)[:] = check_authors diff = compare_xml(get_document_content(correct), get_document_content(check)) assert diff is None, diff
def test_cols(self): ws = Worksheet(self.wb) ws.cell('A1').value = 'first' ws.cell('C9').value = 'last' expected = [ ('A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9'), ('B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9'), ('C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'), ] cols = ws.columns for col, coord in zip(cols, expected): assert tuple(c.coordinate for c in col) == coord assert len(cols) == 3 assert cols[0][0].value == 'first' assert cols[-1][-1].value == 'last'
def test_cols(self, Worksheet): ws = Worksheet(Workbook()) ws.cell('A1').value = 'first' ws.cell('C9').value = 'last' expected = [ ('A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9'), ('B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9'), ('C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'), ] cols = ws.columns for col, coord in zip(cols, expected): assert tuple(c.coordinate for c in col) == coord assert len(cols) == 3 assert cols[0][0].value == 'first' assert cols[-1][-1].value == 'last'
def test_write_comments(datadir): datadir.chdir() ws = _create_ws()[0] cw = CommentWriter(ws) content = cw.write_comments() with open('comments1.xml') as expected: correct = fromstring(expected.read()) check = fromstring(content) # check top-level elements have the same name for i, j in zip(correct.getchildren(), check.getchildren()): assert i.tag == j.tag correct_comments = correct.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() check_comments = check.find('{%s}commentList' % SHEET_MAIN_NS).getchildren() correct_authors = correct.find('{%s}authors' % SHEET_MAIN_NS).getchildren() check_authors = check.find('{%s}authors' % SHEET_MAIN_NS).getchildren() # replace author ids with author names for i in correct_comments: i.attrib["authorId"] = correct_authors[int(i.attrib["authorId"])].text for i in check_comments: i.attrib["authorId"] = check_authors[int(i.attrib["authorId"])].text # sort the comment list correct_comments.sort(key=lambda tag: tag.attrib["ref"]) check_comments.sort(key=lambda tag: tag.attrib["ref"]) correct.find('{%s}commentList' % SHEET_MAIN_NS)[:] = correct_comments check.find('{%s}commentList' % SHEET_MAIN_NS)[:] = check_comments # sort the author list correct_authors.sort(key=lambda tag: tag.text) check_authors.sort(key=lambda tag:tag.text) correct.find('{%s}authors' % SHEET_MAIN_NS)[:] = correct_authors check.find('{%s}authors' % SHEET_MAIN_NS)[:] = check_authors diff = compare_xml(get_document_content(correct), get_document_content(check)) assert diff is None, diff
def __defaults__(self): spec = inspect.getargspec(self.__class__.__init__) return dict(zip(spec.args[1:], spec.defaults))
def test_read_fast_integrated_text(sample_workbook): wb = sample_workbook ws = wb['Sheet1 - Text'] for row, expected_row in zip(ws.rows, expected): row_values = [x.value for x in row] assert row_values == expected_row