def get_workbooks(self): if self.temp_path is None: return filenames = [] for name in os.listdir(self.temp_path): d = name.split('-',1) d.append(name) filenames.append(d) filenames.sort() for i,filename,pathname in filenames: yield ( # We currently don't open with on_demand=True here # as error filters should be lastish in the chain # so there's not much win. # However, if we did, getting rid of the temp dirs # becomes a problem as, on Windows, they can't be # deleted until the xlrd.Book object is done with # and we don't know when that might be :-( xlrd.open_workbook( os.path.join(self.temp_path,pathname), pickleable=0, formatting_info=1, on_demand=False, ragged_rows=True ), filename )
def get_workbooks(self): if self.temp_path is None: return filenames = [] for name in os.listdir(self.temp_path): d = name.split("-", 1) d.append(name) filenames.append(d) filenames.sort() for i, filename, pathname in filenames: yield ( # We currently don't open with on_demand=True here # as error filters should be lastish in the chain # so there's not much win. # However, if we did, getting rid of the temp dirs # becomes a problem as, on Windows, they can't be # deleted until the xlrd.Book object is done with # and we don't know when that might be :-( xlrd.open_workbook( os.path.join(self.temp_path, pathname), pickleable=0, formatting_info=1, on_demand=False, ragged_rows=True, ), filename, )
def get_workbooks(self): """ If the data to be processed is not stored in files or if special parameters need to be passed to xlrd.open_workbook then this method must be overriden. Any implementation must return an iterable sequence of tuples. The first element of which must be an xlrd.Book object and the second must be the filename of the file from which the book object came. """ for path in self.get_filepaths(): yield ( xlrd.open_workbook(path, pickleable=0, formatting_info=1, on_demand=True, ragged_rows=True), os.path.split(path)[1], )
def get_workbooks(self): """ If the data to be processed is not stored in files or if special parameters need to be passed to xlrd.open_workbook then this method must be overriden. Any implementation must return an iterable sequence of tuples. The first element of which must be an xlrd.Book object and the second must be the filename of the file from which the book object came. """ for path in self.get_filepaths(): yield ( xlrd.open_workbook( path, pickleable=0, formatting_info=1, on_demand=True, ragged_rows=True), os.path.split(path)[1] )
def get_book_sheet(excel_name, sheet_name): """Gets an Excel Worksheet from a given file name Parameters ---------- excel_name : str The full path for the desired Excel file. sheet_name : str The name of the desired Excel Worksheet. Returns ------- workbook, sheet : tuple A tuple with an ``xlwt.Workbook`` and an ``xlwt.Worksheet`` object. """ from desicos.xlrd import open_workbook from desicos.xlutils.copy import copy if os.path.isfile(excel_name): rb = open_workbook(excel_name, formatting_info=True) sheet_names = [s.name for s in rb.sheets()] #rs = rb.sheet_by_index(0) book = copy(rb) sheet = book.get_sheet(0) count = -1 while True: count += 1 new_sheet_name = sheet_name + '_%02d' % count if not new_sheet_name in sheet_names: sheet = book.add_sheet(new_sheet_name) break else: from desicos.xlwt import Workbook book = Workbook() sheet = book.add_sheet(sheet_name + '_00') return book, sheet
def check_file(fname, verbose, do_punc=False, fmt_info=0, encoding='ascii', onesheet=''): print print fname if do_punc: checker = ispunc else: checker = None try: book = open_workbook(fname, formatting_info=fmt_info, on_demand=True) except TypeError: try: book = open_workbook(fname, formatting_info=fmt_info) except TypeError: # this is becoming ridiculous book = open_workbook(fname) totold = totnew = totnotnull = 0 if onesheet is None or onesheet == "": shxrange = range(book.nsheets) else: try: shxrange = [int(onesheet)] except ValueError: shxrange = [book.sheet_names().index(onesheet)] for shx in shxrange: sheet = book.sheet_by_index(shx) ngoodrows = number_of_good_rows(sheet, checker) ngoodcols = number_of_good_cols(sheet, checker, nrows=ngoodrows) oldncells = sheet.nrows * sheet.ncols newncells = ngoodrows * ngoodcols totold += oldncells totnew += newncells nnotnull = 0 sheet_density_pct_s = '' if verbose >= 2: colxrange = range(ngoodcols) for rowx in xrange(ngoodrows): rowtypes = sheet.row_types(rowx) for colx in colxrange: if rowtypes[colx] not in null_cell_types: nnotnull += 1 totnotnull += nnotnull sheet_density_pct = (nnotnull * 100.0) / max(1, newncells) sheet_density_pct_s = "; den = %5.1f%%" % sheet_density_pct if verbose >= 3: # which rows have non_empty cells in the right-most column? lastcolx = sheet.ncols - 1 for rowx in xrange(sheet.nrows): cell = sheet.cell(rowx, lastcolx) if cell.ctype != XL_CELL_EMPTY: print "%s (%d, %d): type %d, value %r" % (cellname( rowx, lastcolx), rowx, lastcolx, cell.ctype, cell.value) if (verbose or ngoodrows != sheet.nrows or ngoodcols != sheet.ncols or (verbose >= 2 and ngoodcells and sheet_density_pct < 90.0)): if oldncells: pctwaste = (1.0 - float(newncells) / oldncells) * 100.0 else: pctwaste = 0.0 shname_enc = safe_encode(sheet.name, encoding) print "sheet #%2d: RxC %5d x %3d => %5d x %3d; %4.1f%% waste%s (%s)" \ % (shx, sheet.nrows, sheet.ncols, ngoodrows, ngoodcols, pctwaste, sheet_density_pct_s, shname_enc) if hasattr(book, 'unload_sheet'): book.unload_sheet(shx) if totold: pctwaste = (1.0 - float(totnew) / totold) * 100.0 else: pctwaste = 0.0 print "%d cells => %d cells; %4.1f%% waste" % (totold, totnew, pctwaste)
def check_file(fname, verbose, do_punc=False, fmt_info=0, encoding='ascii', onesheet=''): print print fname if do_punc: checker = ispunc else: checker = None try: book = open_workbook(fname, formatting_info=fmt_info, on_demand=True) except TypeError: try: book = open_workbook(fname, formatting_info=fmt_info) except TypeError: # this is becoming ridiculous book = open_workbook(fname) totold = totnew = totnotnull = 0 if onesheet is None or onesheet == "": shxrange = range(book.nsheets) else: try: shxrange = [int(onesheet)] except ValueError: shxrange = [book.sheet_names().index(onesheet)] for shx in shxrange: sheet = book.sheet_by_index(shx) ngoodrows = number_of_good_rows(sheet, checker) ngoodcols = number_of_good_cols(sheet, checker, nrows=ngoodrows) oldncells = sheet.nrows * sheet.ncols newncells = ngoodrows * ngoodcols totold += oldncells totnew += newncells nnotnull = 0 sheet_density_pct_s = '' if verbose >= 2: colxrange = range(ngoodcols) for rowx in xrange(ngoodrows): rowtypes = sheet.row_types(rowx) for colx in colxrange: if rowtypes[colx] not in null_cell_types: nnotnull += 1 totnotnull += nnotnull sheet_density_pct = (nnotnull * 100.0) / max(1, newncells) sheet_density_pct_s = "; den = %5.1f%%" % sheet_density_pct if verbose >= 3: # which rows have non_empty cells in the right-most column? lastcolx = sheet.ncols - 1 for rowx in xrange(sheet.nrows): cell = sheet.cell(rowx, lastcolx) if cell.ctype != XL_CELL_EMPTY: print "%s (%d, %d): type %d, value %r" % ( cellname(rowx, lastcolx), rowx, lastcolx, cell.ctype, cell.value) if (verbose or ngoodrows != sheet.nrows or ngoodcols != sheet.ncols or (verbose >= 2 and ngoodcells and sheet_density_pct < 90.0) ): if oldncells: pctwaste = (1.0 - float(newncells) / oldncells) * 100.0 else: pctwaste = 0.0 shname_enc = safe_encode(sheet.name, encoding) print "sheet #%2d: RxC %5d x %3d => %5d x %3d; %4.1f%% waste%s (%s)" \ % (shx, sheet.nrows, sheet.ncols, ngoodrows, ngoodcols, pctwaste, sheet_density_pct_s, shname_enc) if hasattr(book, 'unload_sheet'): book.unload_sheet(shx) if totold: pctwaste = (1.0 - float(totnew) / totold) * 100.0 else: pctwaste = 0.0 print "%d cells => %d cells; %4.1f%% waste" % (totold, totnew, pctwaste)
[Initial direct access through book.name_map] Sales * 0 lists all occurrences of "Sales" in any scope [Direct access through book.name_and_scope_map] Revenue -1 0 checks if "Revenue" exists in global scope """ sys.stdout.write(text) if len(sys.argv) != 5: usage() sys.exit(0) arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls" arg_name = sys.argv[2] # see below arg_scope = sys.argv[3] # see below arg_show_contents = int(sys.argv[4]) # 0: no show, 1: only non-empty cells, # 2: all cells for fname in glob.glob(arg_pattern): book = xlrd.open_workbook(fname) if arg_name == "*": # Examine book.name_obj_list to find all names # in a given scope ("*" => all scopes) do_scope_query(book, arg_scope, arg_show_contents) elif arg_scope == "*": # Using book.name_map to find all usage of a name. show_name_details(book, arg_name, arg_show_contents) else: # Using book.name_and_scope_map to find which if any instances # of a name are visible in the given scope, which can be supplied # as -1 (global) or a sheet number or a sheet name. show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)
[Direct access through book.name_and_scope_map] Revenue -1 0 checks if "Revenue" exists in global scope """ sys.stdout.write(text) if len(sys.argv) != 5: usage() sys.exit(0) arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls" arg_name = sys.argv[2] # see below arg_scope = sys.argv[3] # see below arg_show_contents = int( sys.argv[4]) # 0: no show, 1: only non-empty cells, # 2: all cells for fname in glob.glob(arg_pattern): book = xlrd.open_workbook(fname) if arg_name == "*": # Examine book.name_obj_list to find all names # in a given scope ("*" => all scopes) do_scope_query(book, arg_scope, arg_show_contents) elif arg_scope == "*": # Using book.name_map to find all usage of a name. show_name_details(book, arg_name, arg_show_contents) else: # Using book.name_and_scope_map to find which if any instances # of a name are visible in the given scope, which can be supplied # as -1 (global) or a sheet number or a sheet name. show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)