def test_not_corrupted(self): with self.assertRaises(Exception) as context: excelrd.open_workbook(from_this_dir("corrupted_error.xls")) self.assertTrue("Workbook corruption" in str(context.exception)) excelrd.open_workbook(from_this_dir("corrupted_error.xls"), ignore_workbook_corruption=True)
def test_excel_comments(self): book = open_workbook(from_this_dir("test_comments_excel.xlsx")) sheet = book.sheet_by_index(0) note_map = sheet.cell_note_map self.assertEqual(len(note_map), 1) self.assertEqual(note_map[(0, 1)].text, "hello")
def test_merged_cells(self): book = excelrd.open_workbook(from_this_dir("xf_class.xls"), formatting_info=True) sheet3 = book.sheet_by_name("table2") row_lo, row_hi, col_lo, col_hi = sheet3.merged_cells[0] self.assertEqual(sheet3.cell(row_lo, col_lo).value, "MERGED") self.assertEqual((row_lo, row_hi, col_lo, col_hi), (3, 7, 2, 5))
def test_excel_comments_with_multi_sheets(self): book = open_workbook(from_this_dir("test_comments_excel_sheet2.xlsx")) sheet = book.sheet_by_index(1) note_map = sheet.cell_note_map self.assertEqual(len(note_map), 1) self.assertEqual(note_map[(1, 1)].text, "Note lives here") self.assertEqual(len(book.sheet_by_index(0).cell_note_map), 0)
def test_read_ragged(self): book = excelrd.open_workbook(from_this_dir("ragged.xls"), ragged_rows=True) sheet = book.sheet_by_index(0) self.assertEqual(sheet.row_len(0), 3) self.assertEqual(sheet.row_len(1), 2) self.assertEqual(sheet.row_len(2), 1) self.assertEqual(sheet.row_len(3), 4) self.assertEqual(sheet.row_len(4), 4)
def test_for_github_issue_101(self): # Test for non-Excel file with forward slash file separator # https://github.com/python-excel/excelrd/issues/101 workbook = excelrd.open_workbook(from_this_dir("self_evaluation_report_2014-05-19.xlsx")) worksheet = workbook.sheet_by_index(0) # Test reading sample data from the worksheet. cell = worksheet.cell(0, 0) self.assertEqual(cell.value, "one") self.assertEqual(cell.ctype, excelrd.book.XL_CELL_TEXT)
def test_for_github_issue_96(self): # Test for non-Excel file with forward slash file separator and # lowercase names. https://github.com/python-excel/excelrd/issues/96 workbook = excelrd.open_workbook(from_this_dir("apachepoi_49609.xlsx")) worksheet = workbook.sheet_by_index(0) # Test reading sample data from the worksheet. cell = worksheet.cell(0, 1) self.assertEqual(cell.value, "Cycle") self.assertEqual(cell.ctype, excelrd.book.XL_CELL_TEXT) cell = worksheet.cell(1, 1) self.assertEqual(cell.value, 1) self.assertEqual(cell.ctype, excelrd.book.XL_CELL_NUMBER)
def test_for_github_issue_150(self): # Test for non-Excel file with a non-lowercase worksheet filename. # https://github.com/python-excel/excelrd/issues/150 workbook = excelrd.open_workbook(from_this_dir("issue150.xlsx")) worksheet = workbook.sheet_by_index(0) # Test reading sample data from the worksheet. cell = worksheet.cell(0, 1) self.assertEqual(cell.value, "Cycle") self.assertEqual(cell.ctype, excelrd.book.XL_CELL_TEXT) cell = worksheet.cell(1, 1) self.assertEqual(cell.value, 1) self.assertEqual(cell.ctype, excelrd.book.XL_CELL_NUMBER)
def table_body(self, **kwargs): book = excelrd.open_workbook(kwargs['filename']) sh = book.sheet_by_name(kwargs['sheetname']) if kwargs['rows'] == all: kwargs['rows'] = sh.nrows if kwargs['cols'] == all: kwargs['cols'] = sh.ncols datas = [] for row_idx in range(1, kwargs['rows']): data = sh.row_values(row_idx, 0, kwargs['cols']) datas.append(data) return datas
def test_for_github_issue_75(self): # Test <cell> inlineStr attribute without <si> child. # https://github.com/python-excel/excelrd/issues/75 workbook = excelrd.open_workbook(from_this_dir("apachepoi_52348.xlsx")) worksheet = workbook.sheet_by_index(0) # Test an empty inlineStr cell. cell = worksheet.cell(0, 0) self.assertEqual(cell.value, "") self.assertEqual(cell.ctype, excelrd.book.XL_CELL_EMPTY) # Test a non-empty inlineStr cell. cell = worksheet.cell(1, 2) self.assertEqual(cell.value, "Category") self.assertEqual(cell.ctype, excelrd.book.XL_CELL_TEXT)
def main(): book = excelrd.open_workbook("namesdemo.xls") print("The number of worksheets is {}".format(book.nsheets)) print("Worksheet name(s): {}".format(", ".join(book.sheet_names()))) sh = book.sheet_by_index(2) print("{}: rows={}, cols={}".format(sh.name, sh.nrows, sh.ncols)) for row_idx in range(sh.nrows): for col_idx in range(sh.ncols): cell = sh.cell(row_idx, col_idx) if not cell.value: continue print("row={}, col={}, value={}".format(row_idx, col_idx, cell.value))
def table_head(self, **kwargs): book = excelrd.open_workbook(kwargs['filename']) sh = book.sheet_by_name(kwargs['sheetname']) if kwargs['rows'] == all: kwargs['rows'] = sh.nrows if kwargs['cols'] == all: kwargs['cols'] = sh.ncols datas = [] table_head_1 = ['----'] for row_idx in range(1): for col_idx in range(kwargs['cols']): cell = sh.cell(row_idx, col_idx) if not cell.value: continue data = ("{}".format(cell.value)) datas.append(data) num = kwargs['cols'] table_heads_1 = table_head_1 * num return datas, table_heads_1
def test_merged_cells_xlsx(self): book = excelrd.open_workbook(from_this_dir("merged_cells.xlsx")) sheet1 = book.sheet_by_name("Sheet1") expected = [] got = sheet1.merged_cells self.assertEqual(expected, got) sheet2 = book.sheet_by_name("Sheet2") expected = [(0, 1, 0, 2)] got = sheet2.merged_cells self.assertEqual(expected, got) sheet3 = book.sheet_by_name("Sheet3") expected = [(0, 1, 0, 2), (0, 1, 2, 4), (1, 4, 0, 2), (1, 9, 2, 4)] got = sheet3.merged_cells self.assertEqual(expected, got) sheet4 = book.sheet_by_name("Sheet4") expected = [(0, 1, 0, 2), (2, 20, 0, 1), (1, 6, 2, 5)] got = sheet4.merged_cells self.assertEqual(expected, got)
Revenue -1 0 checks if "Revenue" exists in global scope """ sys.stdout.write(text) if len(sys.argv) != 5: usage() sys.exit(0) arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls" arg_name = sys.argv[2] # see below arg_scope = sys.argv[3] # see below # 0: no show, # 1: only non-empty cells, # 2: all cells arg_show_contents = int(sys.argv[4]) for fname in glob.glob(arg_pattern): book = excelrd.open_workbook(fname) if arg_name == "*": # Examine book.name_obj_list to find all names # in a given scope ("*" => all scopes) do_scope_query(book, arg_scope, arg_show_contents) elif arg_scope == "*": # Using book.name_map to find all usage of a name. show_name_details(book, arg_name, arg_show_contents) else: # Using book.name_and_scope_map to find which if any instances # of a name are visible in the given scope, which can be supplied # as -1 (global) or a sheet number or a sheet name. show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)
def setUp(self): self.book = open_workbook( from_this_dir("sharedstrings_alt_location.xlsx"))
def setUp(self): self.book = excelrd.open_workbook(from_this_dir("Formate.xls"), formatting_info=True) self.sheet = self.book.sheet_by_name("Blätt1")
def test_xlsx_lower_case_cellnames(self): # Check if it opens with lower cell names open_workbook(from_this_dir("test_lower_case_cellnames.xlsx"))
def test_err_cell_empty(self): # For cell with type "e" (error) but without inner 'val' tags open_workbook(from_this_dir("err_cell_empty.xlsx"))
def test_xlsx(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir("reveng1.xlsx"))
def test_excel_comments_no_t_elements(self): book = open_workbook(from_this_dir("test_comments_excel.xlsx")) sheet = book.sheet_by_index(3) note_map = sheet.cell_note_map self.assertEqual(note_map[(0, 0)].text, "")
def test_BYTES_X00(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir("picture_in_cell.xls"), formatting_info=True)
def test_ragged_rows_tidied_with_formatting(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir("issue20.xls"), formatting_info=True)
def test_tilde_path_expansion(self): with tempfile.NamedTemporaryFile(suffix=".xlsx", dir=os.path.expanduser("~")) as fp: shutil.copyfile(from_this_dir("text_bar.xlsx"), fp.name) # For now, we just check this doesn't raise an error. open_workbook(os.path.join("~", os.path.basename(fp.name)))
def test_names_demo(self): # For now, we just check this doesn't raise an error. open_workbook( from_this_dir(os.path.join("..", "examples", "namesdemo.xls")), )
def main(cmd_args): import optparse global options usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc oparser = optparse.OptionParser(usage) oparser.add_option("-l", "--logfilename", default="", help="contains error messages") oparser.add_option( "-v", "--verbosity", type="int", default=0, help="level of information and diagnostics provided", ) oparser.add_option( "-m", "--mmap", type="int", default=-1, help="1: use mmap; 0: don't use mmap; -1: accept heuristic", ) oparser.add_option("-e", "--encoding", default="", help="encoding override") oparser.add_option( "-f", "--formatting", type="int", default=0, help="0 (default): no fmt info\n" "1: fmt info (all cells)\n", ) oparser.add_option( "-g", "--gc", type="int", default=0, help= "0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc", ) oparser.add_option( "-s", "--onesheet", default="", help="restrict output to this sheet (name or index)") oparser.add_option( "-u", "--unnumbered", action="store_true", default=0, help="omit line numbers or offsets in biff_dump", ) oparser.add_option( "-d", "--on-demand", action="store_true", default=0, help="load sheets on demand instead of all at once", ) oparser.add_option( "-t", "--suppress-timing", action="store_true", default=0, help="don't print timings (diffs are less messy)", ) oparser.add_option( "-r", "--ragged-rows", action="store_true", default=0, help="open_workbook(..., ragged_rows=True)", ) options, args = oparser.parse_args(cmd_args) if len(args) == 1 and args[0] in ("version", ): pass elif len(args) < 2: oparser.error("Expected at least 2 args, found %d" % len(args)) cmd = args[0] xlrd_version = getattr(excelrd, "__VERSION__", "unknown; before 0.5") if cmd == "biff_dump": excelrd.dump(args[1], unnumbered=options.unnumbered) sys.exit(0) if cmd == "biff_count": excelrd.count_records(args[1]) sys.exit(0) if cmd == "version": print("excelrd: {}, from {}".format(xlrd_version, excelrd.__file__)) print("Python:", sys.version) sys.exit(0) if options.logfilename: logfile = LogHandler(open(options.logfilename, "w")) else: logfile = sys.stdout mmap_opt = options.mmap mmap_arg = excelrd.USE_MMAP if mmap_opt in (1, 0): mmap_arg = mmap_opt elif mmap_opt != -1: print("Unexpected value (%r) for mmap option -- assuming default" % mmap_opt) fmt_opt = options.formatting | (cmd in ("xfc", )) gc_mode = options.gc if gc_mode: gc.disable() for pattern in args[1:]: for fname in glob.glob(pattern): print("\n=== File: %s ===" % fname) if logfile != sys.stdout: logfile.setfileheading("\n=== File: %s ===\n" % fname) if gc_mode == 1: n_unreachable = gc.collect() if n_unreachable: print("GC before open:", n_unreachable, "unreachable objects") try: t0 = time.time() bk = excelrd.open_workbook( fname, verbosity=options.verbosity, logfile=logfile, use_mmap=mmap_arg, encoding_override=options.encoding, formatting_info=fmt_opt, on_demand=options.on_demand, ragged_rows=options.ragged_rows, ) t1 = time.time() if not options.suppress_timing: print("Open took {:.2f} seconds".format(t1 - t0)) except excelrd.XLRDError as e: print("*** Open failed: {}: {}".format( type(e).__name__, e)) continue except KeyboardInterrupt: print("*** KeyboardInterrupt ***") traceback.print_exc(file=sys.stdout) sys.exit(1) except BaseException as e: print("*** Open failed: {}: {}".format( type(e).__name__, e)) traceback.print_exc(file=sys.stdout) continue t0 = time.time() if cmd == "hdr": bk_header(bk) elif cmd == "ov": # OverView show(bk, 0) elif cmd == "show": # all rows show(bk) elif cmd == "2rows": # first row and last row show(bk, 2) elif cmd == "3rows": # first row, 2nd row and last row show(bk, 3) elif cmd == "bench": show(bk, printit=0) elif cmd == "fonts": bk_header(bk) show_fonts(bk) elif cmd == "names": # named reference list show_names(bk) elif cmd == "name_dump": # named reference list show_names(bk, dump=1) elif cmd == "labels": show_labels(bk) elif cmd == "xfc": count_xfs(bk) else: print("*** Unknown command <%s>" % cmd) sys.exit(1) del bk if gc_mode == 1: n_unreachable = gc.collect() if n_unreachable: print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects") if not options.suppress_timing: t1 = time.time() print("\ncommand took {:.2f} seconds\n".format(t1 - t0)) return None
def test_excel_comments_multiline(self): book = open_workbook(from_this_dir("test_comments_excel.xlsx")) sheet = book.sheet_by_index(1) note_map = sheet.cell_note_map self.assertEqual(note_map[(1, 2)].text, "1st line\n2nd line")
def setUp(self): path = from_this_dir("biff4_no_format_no_window2.xls") self.book = open_workbook(path) self.sheet = self.book.sheet_by_index(0)
def test_xlsx_simple(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir("text_bar.xlsx"))
def setUp(self): book = excelrd.open_workbook(from_this_dir("formula_test_names.xls")) self.sheet = book.sheet_by_index(0)
def setUp(self): self.book = excelrd.open_workbook(from_this_dir("profiles.xls"), formatting_info=True) self.sheet = self.book.sheet_by_name("PROFILEDEF")