def test_not_corrupted(self): with self.assertRaises(Exception) as context: xlrd2.open_workbook(from_this_dir('corrupted_error.xls')) self.assertTrue('Workbook corruption' in str(context.exception)) xlrd2.open_workbook(from_this_dir('corrupted_error.xls'), ignore_workbook_corruption=True)
def test_merged_cells(self): book = xlrd2.open_workbook(from_this_dir('xf_class.xls'), formatting_info=True) sheet3 = book.sheet_by_name('table2') row_lo, row_hi, col_lo, col_hi = sheet3.merged_cells[0] self.assertEqual(sheet3.cell(row_lo, col_lo).value, 'MERGED') self.assertEqual((row_lo, row_hi, col_lo, col_hi), (3, 7, 2, 5))
def test_excel_comments(self): book = open_workbook(from_this_dir('test_comments_excel.xlsx')) sheet = book.sheet_by_index(0) note_map = sheet.cell_note_map self.assertEqual(len(note_map), 1) self.assertEqual(note_map[(0, 1)].text, 'hello')
def ReadExel(path, name='', sheet_name=''): full_path = '' if name == '': #定义输出路径 full_path = path else: full_path = path + name print("准备读取:" + full_path) res = [] if (os.path.isfile(full_path)): #读取excel文件 data = xlrd2.open_workbook(full_path) #读取所有sheet名称 sheet = data.sheet_names() if sheet_name != '': table = data.sheet_by_name(sheet_name) else: # 读取第一个sheet表格 table = data[0] if len(sheet) > 0: for row in range(table.nrows): res.append(table.row_values(row)) # print(table.row_values(row)) print(full_path + "读取完毕") return res else: print('文件中无数据') return 1 else: print("文件不存在,导入失败") return 2
def load_excel_xlrd(data): """Read in an Excel file into an ExceBook object directly with the xlrd Excel library. @param data (str) The Excel file contents. @return (core.excel.ExceBook object) On success return the Excel spreadsheet as an ExcelBook object. Returns None on error. """ # Only use this on Office 97 Excel files. if (not filetype.is_office97_file(data, True)): log.warning("File is not an Excel 97 file. Not reading with xlrd2.") return None # It is Office 97. See if we can read it with xlrd2. try: if (log.getEffectiveLevel() == logging.DEBUG): log.debug("Trying to load with xlrd...") r = xlrd.open_workbook(file_contents=data) return r except Exception as e: log.error("Reading in file as Excel with xlrd failed. " + str(e)) return None
def test_excel_comments_with_multi_sheets(self): book = open_workbook(from_this_dir('test_comments_excel_sheet2.xlsx')) sheet = book.sheet_by_index(1) note_map = sheet.cell_note_map self.assertEqual(len(note_map), 1) self.assertEqual(note_map[(1, 1)].text, 'Note lives here') self.assertEqual(len(book.sheet_by_index(0).cell_note_map), 0)
def __init__(self, xls_doc_path): self.xls_workbook = xlrd2.open_workbook(xls_doc_path, formatting_info=True) self._macrosheets = None self._defined_names = None self.xl_international_flags = {} self.xl_international_flags = { XlApplicationInternational.xlLeftBracket: '[', XlApplicationInternational.xlListSeparator: ',', XlApplicationInternational.xlRightBracket: ']' }
def test_for_github_issue_101(self): # Test for non-Excel file with forward slash file separator # https://github.com/python-excel/xlrd/issues/101 workbook = xlrd2.open_workbook( from_this_dir('self_evaluation_report_2014-05-19.xlsx')) worksheet = workbook.sheet_by_index(0) # Test reading sample data from the worksheet. cell = worksheet.cell(0, 0) self.assertEqual(cell.value, 'one') self.assertEqual(cell.ctype, xlrd2.book.XL_CELL_TEXT)
def get_xls(): cls = [] file_path = get_file_path.get_root_path( ) + '/zhoudaoan/api_auto_test-master/testdata/testdata.xlsx' # 文件位置 excel_file = xlrd2.open_workbook(file_path) sheet = excel_file.sheet_by_name('Sheet1') nrows = sheet.nrows for i in range(nrows): cls.append(sheet.row_values(i)) return cls
def __init__(self, excel_path, sheetName): # 打开文件 self.data = xlrd2.open_workbook(excel_path) # 通过name获取 self.table = self.data.sheet_by_name(sheetName) # 获取第一行的值作为key self.keys = self.table.row_values(0) # 获取总行数 self.rowNums = self.table.nrows # 获取总列数 self.colNums = self.table.ncols
def __init__(self, xls_doc_path): self.xls_workbook = xlrd2.open_workbook(xls_doc_path, formatting_info=True) self._macrosheets = None self._defined_names = None self.xl_international_flags = {} self.xl_international_flags = {XlApplicationInternational.xlLeftBracket: '[', XlApplicationInternational.xlListSeparator: ',', XlApplicationInternational.xlRightBracket: ']'} control_chars = ''.join(map(chr, range(0, 32))) control_chars += ''.join(map(chr, range(127, 160))) control_chars += '\ufefe\uffff\ufeff\ufffe\uffef\ufff0\ufff1\ufff6\ufefd\udddd\ufffd' self._control_char_re = re.compile('[%s]' % re.escape(control_chars))
def test_for_github_issue_150(self): # Test for non-Excel file with a non-lowercase worksheet filename. # https://github.com/python-excel/xlrd/issues/150 workbook = xlrd2.open_workbook(from_this_dir('issue150.xlsx')) worksheet = workbook.sheet_by_index(0) # Test reading sample data from the worksheet. cell = worksheet.cell(0, 1) self.assertEqual(cell.value, 'Cycle') self.assertEqual(cell.ctype, xlrd2.book.XL_CELL_TEXT) cell = worksheet.cell(1, 1) self.assertEqual(cell.value, 1) self.assertEqual(cell.ctype, xlrd2.book.XL_CELL_NUMBER)
def test_for_github_issue_96(self): # Test for non-Excel file with forward slash file separator and # lowercase names. https://github.com/python-excel/xlrd/issues/96 workbook = xlrd2.open_workbook(from_this_dir('apachepoi_49609.xlsx')) worksheet = workbook.sheet_by_index(0) # Test reading sample data from the worksheet. cell = worksheet.cell(0, 1) self.assertEqual(cell.value, 'Cycle') self.assertEqual(cell.ctype, xlrd2.book.XL_CELL_TEXT) cell = worksheet.cell(1, 1) self.assertEqual(cell.value, 1) self.assertEqual(cell.ctype, xlrd2.book.XL_CELL_NUMBER)
def test_for_github_issue_75(self): # Test <cell> inlineStr attribute without <si> child. # https://github.com/python-excel/xlrd/issues/75 workbook = xlrd2.open_workbook(from_this_dir('apachepoi_52348.xlsx')) worksheet = workbook.sheet_by_index(0) # Test an empty inlineStr cell. cell = worksheet.cell(0, 0) self.assertEqual(cell.value, '') self.assertEqual(cell.ctype, xlrd2.book.XL_CELL_EMPTY) # Test a non-empty inlineStr cell. cell = worksheet.cell(1, 2) self.assertEqual(cell.value, 'Category') self.assertEqual(cell.ctype, xlrd2.book.XL_CELL_TEXT)
def get_books_list2(): book = xlrd2.open_workbook(ROOT_EXCEL) sheet = book.sheet_by_name(ROOT_SHEET) for i in range(sheet.nrows): url_list = sheet.row_values(i) # 简体文件名 | 繁体文件名 | 网站目录 save_dir = ROOT_DIR + url_list[2] # 保存路径 # 特殊字符替换 save_dir = save_dir.replace('?', '.') down_url = get_down_url(url_list[2]) # 下载URL print("从Excel文件读取:%s" % (save_dir)) file_save = C_DownFiles(save_dir, down_url) GLOBAL_DOWN_LIST.insert(0, file_save) # 更新下载对象到本地文件中 save_all_objects()
def test_merged_cells_xlsx(self): book = xlrd2.open_workbook(from_this_dir('merged_cells.xlsx')) sheet1 = book.sheet_by_name('Sheet1') expected = [] got = sheet1.merged_cells self.assertEqual(expected, got) sheet2 = book.sheet_by_name('Sheet2') expected = [(0, 1, 0, 2)] got = sheet2.merged_cells self.assertEqual(expected, got) sheet3 = book.sheet_by_name('Sheet3') expected = [(0, 1, 0, 2), (0, 1, 2, 4), (1, 4, 0, 2), (1, 9, 2, 4)] got = sheet3.merged_cells self.assertEqual(expected, got) sheet4 = book.sheet_by_name('Sheet4') expected = [(0, 1, 0, 2), (2, 20, 0, 1), (1, 6, 2, 5)] got = sheet4.merged_cells self.assertEqual(expected, got)
def ExcelToSQL(file): # 判断是否文件是否为空 if file is not None: # 读取文件 data = xlrd2.open_workbook(file) sheet = data.sheet_names() # 读取第一个sheet表格 table = data.sheet_by_name(sheet[0]) rowNum = table.nrows #获取有效总行数 col = table.ncols #获取有效总列数 print("You select " + str(file) + "中的" + str(table)) print("The total rows is " + str(table.nrows)) print("The total cols is " + str(table.ncols)) # 初始化数据库语句 sql = [] for r in range(rowNum): if r > 0: sql.append('update ' + SQLFrom + ' set ') for c in range(col): # print(table.cell(r,c).value) sql[r - 1] = sql[r - 1] + SetVa( table.cell(0, c).value, table.cell(r, c).value) if c < (col - 1): sql[r - 1] = sql[r - 1] + ', ' sql[r - 1] = sql[r - 1] + " where " + SetVa( "id", table.cell(r, 0).value) print(sql[r - 1]) #输出SQL语句至txt文件 WriteTxt("sql", str(workpath), sql)
def read_excel(): file_path = get_file_path.get_root_path() + 'testdata\\testdata.xlsx' # 文件位置 excel_file = xlrd2.open_workbook(file_path) # 获取sheet内容【1.根据sheet索引2.根据sheet名称】 # sheet=ExcelFile.sheet_by_index(1) sheet = excel_file.sheet_by_name('Sheet1') # 打印sheet的名称,行数,列数 print(sheet.name) print(sheet.nrows) print(sheet.ncols) # 获取整行或者整列的值 rows = sheet.row_values(1) cols = sheet.col_values(1) print(rows) print(cols) #获取单元格内容 print("第二行第一列的值为: %s", sheet.cell(1, 0)) # 打印单元格内容格式 print("单元格内容格式为: %s", sheet.cell(0, 0).ctype)
def test_tilde_path_expansion(self): with tempfile.NamedTemporaryFile(suffix='.xlsx', dir=os.path.expanduser('~')) as fp: shutil.copyfile(from_this_dir('text_bar.xlsx'), fp.name) # For now, we just check this doesn't raise an error. open_workbook(os.path.join('~', os.path.basename(fp.name)))
def init_xlsx(): with xlrd2.open_workbook("novosibirsk.e2e4online.ru.xlsx") as book: return book
def main(cmd_args): import optparse global options usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc oparser = optparse.OptionParser(usage) oparser.add_option("-l", "--logfilename", default="", help="contains error messages") oparser.add_option( "-v", "--verbosity", type="int", default=0, help="level of information and diagnostics provided") oparser.add_option( "-m", "--mmap", type="int", default=-1, help="1: use mmap; 0: don't use mmap; -1: accept heuristic") oparser.add_option("-e", "--encoding", default="", help="encoding override") oparser.add_option( "-f", "--formatting", type="int", default=0, help="0 (default): no fmt info\n" "1: fmt info (all cells)\n", ) oparser.add_option( "-g", "--gc", type="int", default=0, help= "0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc" ) oparser.add_option( "-s", "--onesheet", default="", help="restrict output to this sheet (name or index)") oparser.add_option("-u", "--unnumbered", action="store_true", default=0, help="omit line numbers or offsets in biff_dump") oparser.add_option("-d", "--on-demand", action="store_true", default=0, help="load sheets on demand instead of all at once") oparser.add_option("-t", "--suppress-timing", action="store_true", default=0, help="don't print timings (diffs are less messy)") oparser.add_option("-r", "--ragged-rows", action="store_true", default=0, help="open_workbook(..., ragged_rows=True)") options, args = oparser.parse_args(cmd_args) if len(args) == 1 and args[0] in ("version", ): pass elif len(args) < 2: oparser.error("Expected at least 2 args, found %d" % len(args)) cmd = args[0] xlrd_version = getattr(xlrd2, "__VERSION__", "unknown; before 0.5") if cmd == 'biff_dump': xlrd2.dump(args[1], unnumbered=options.unnumbered) sys.exit(0) if cmd == 'biff_count': xlrd2.count_records(args[1]) sys.exit(0) if cmd == 'version': print("xlrd2: %s, from %s" % (xlrd_version, xlrd2.__file__)) print("Python:", sys.version) sys.exit(0) if options.logfilename: logfile = LogHandler(open(options.logfilename, 'w')) else: logfile = sys.stdout mmap_opt = options.mmap mmap_arg = xlrd2.USE_MMAP if mmap_opt in (1, 0): mmap_arg = mmap_opt elif mmap_opt != -1: print('Unexpected value (%r) for mmap option -- assuming default' % mmap_opt) fmt_opt = options.formatting | (cmd in ('xfc', )) gc_mode = options.gc if gc_mode: gc.disable() for pattern in args[1:]: for fname in glob.glob(pattern): print("\n=== File: %s ===" % fname) if logfile != sys.stdout: logfile.setfileheading("\n=== File: %s ===\n" % fname) if gc_mode == 1: n_unreachable = gc.collect() if n_unreachable: print("GC before open:", n_unreachable, "unreachable objects") try: t0 = time.time() bk = xlrd2.open_workbook( fname, verbosity=options.verbosity, logfile=logfile, use_mmap=mmap_arg, encoding_override=options.encoding, formatting_info=fmt_opt, on_demand=options.on_demand, ragged_rows=options.ragged_rows, ) t1 = time.time() if not options.suppress_timing: print("Open took %.2f seconds" % (t1 - t0, )) except xlrd2.XLRDError as e: print("*** Open failed: %s: %s" % (type(e).__name__, e)) continue except KeyboardInterrupt: print("*** KeyboardInterrupt ***") traceback.print_exc(file=sys.stdout) sys.exit(1) except BaseException as e: print("*** Open failed: %s: %s" % (type(e).__name__, e)) traceback.print_exc(file=sys.stdout) continue t0 = time.time() if cmd == 'hdr': bk_header(bk) elif cmd == 'ov': # OverView show(bk, 0) elif cmd == 'show': # all rows show(bk) elif cmd == '2rows': # first row and last row show(bk, 2) elif cmd == '3rows': # first row, 2nd row and last row show(bk, 3) elif cmd == 'bench': show(bk, printit=0) elif cmd == 'fonts': bk_header(bk) show_fonts(bk) elif cmd == 'names': # named reference list show_names(bk) elif cmd == 'name_dump': # named reference list show_names(bk, dump=1) elif cmd == 'labels': show_labels(bk) elif cmd == 'xfc': count_xfs(bk) else: print("*** Unknown command <%s>" % cmd) sys.exit(1) del bk if gc_mode == 1: n_unreachable = gc.collect() if n_unreachable: print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects") if not options.suppress_timing: t1 = time.time() print("\ncommand took %.2f seconds\n" % (t1 - t0, )) return None
def test_names_demo(self): # For now, we just check this doesn't raise an error. open_workbook( from_this_dir(os.path.join('..', 'examples', 'namesdemo.xls')), )
def test_xlsx_simple(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir('text_bar.xlsx'))
def test_xlsx(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir('reveng1.xlsx'))
def test_xlsx_lower_case_cellnames(self): # Check if it opens with lower cell names open_workbook(from_this_dir('test_lower_case_cellnames.xlsx'))
def test_err_cell_empty(self): # For cell with type "e" (error) but without inner 'val' tags open_workbook(from_this_dir('err_cell_empty.xlsx'))
Revenue -1 0 checks if "Revenue" exists in global scope """ sys.stdout.write(text) if len(sys.argv) != 5: usage() sys.exit(0) arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls" arg_name = sys.argv[2] # see below arg_scope = sys.argv[3] # see below # 0: no show, # 1: only non-empty cells, # 2: all cells arg_show_contents = int(sys.argv[4]) for fname in glob.glob(arg_pattern): book = xlrd2.open_workbook(fname) if arg_name == "*": # Examine book.name_obj_list to find all names # in a given scope ("*" => all scopes) do_scope_query(book, arg_scope, arg_show_contents) elif arg_scope == "*": # Using book.name_map to find all usage of a name. show_name_details(book, arg_name, arg_show_contents) else: # Using book.name_and_scope_map to find which if any instances # of a name are visible in the given scope, which can be supplied # as -1 (global) or a sheet number or a sheet name. show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)
def test_ragged_rows_tidied_with_formatting(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir('issue20.xls'), formatting_info=True)
import xlrd2 import sys if len(sys.argv) > 1: path = sys.argv[1] xl_workbook = xlrd2.open_workbook(path, formatting_info=True) defined_names = xl_workbook.name_map for sheet in xl_workbook.sheets(): if sheet.boundsheet_type == xlrd2.biffh.XL_MACROSHEET: print(sheet.name) for cell in sheet.get_used_cells(): if cell.formula is not None and len(cell.formula) > 0: print("({},{}):\t{},\t{}".format(cell.row, cell.column, cell.formula, cell.value))
def test_BYTES_X00(self): # For now, we just check this doesn't raise an error. open_workbook(from_this_dir('picture_in_cell.xls'), formatting_info=True)