コード例 #1
0
ファイル: filter.py プロジェクト: yuscale/desicos
 def get_workbooks(self):
     if self.temp_path is None:
         return
     filenames = []
     for name in os.listdir(self.temp_path):
         d = name.split('-',1)
         d.append(name)
         filenames.append(d)
     filenames.sort()
     for i,filename,pathname in filenames:
         yield (
             # We currently don't open with on_demand=True here
             # as error filters should be lastish in the chain
             # so there's not much win.
             # However, if we did, getting rid of the temp dirs
             # becomes a problem as, on Windows, they can't be
             # deleted until the xlrd.Book object is done with
             # and we don't know when that might be :-(
             xlrd.open_workbook(
                 os.path.join(self.temp_path,pathname),
                 pickleable=0,
                 formatting_info=1,
                 on_demand=False,
                 ragged_rows=True
                 ),
             filename
             )
コード例 #2
0
ファイル: filter.py プロジェクト: saullocastro/desicos
 def get_workbooks(self):
     if self.temp_path is None:
         return
     filenames = []
     for name in os.listdir(self.temp_path):
         d = name.split("-", 1)
         d.append(name)
         filenames.append(d)
     filenames.sort()
     for i, filename, pathname in filenames:
         yield (
             # We currently don't open with on_demand=True here
             # as error filters should be lastish in the chain
             # so there's not much win.
             # However, if we did, getting rid of the temp dirs
             # becomes a problem as, on Windows, they can't be
             # deleted until the xlrd.Book object is done with
             # and we don't know when that might be :-(
             xlrd.open_workbook(
                 os.path.join(self.temp_path, pathname),
                 pickleable=0,
                 formatting_info=1,
                 on_demand=False,
                 ragged_rows=True,
             ),
             filename,
         )
コード例 #3
0
ファイル: filter.py プロジェクト: saullocastro/desicos
 def get_workbooks(self):
     """
     If the data to be processed is not stored in files or if
     special parameters need to be passed to xlrd.open_workbook
     then this method must be overriden.
     Any implementation must return an iterable sequence of tuples.
     The first element of which must be an xlrd.Book object and the
     second must be the filename of the file from which the book
     object came.
     """
     for path in self.get_filepaths():
         yield (
             xlrd.open_workbook(path, pickleable=0, formatting_info=1, on_demand=True, ragged_rows=True),
             os.path.split(path)[1],
         )
コード例 #4
0
ファイル: filter.py プロジェクト: yuscale/desicos
 def get_workbooks(self):
     """
     If the data to be processed is not stored in files or if
     special parameters need to be passed to xlrd.open_workbook
     then this method must be overriden.
     Any implementation must return an iterable sequence of tuples.
     The first element of which must be an xlrd.Book object and the
     second must be the filename of the file from which the book
     object came.
     """
     for path in self.get_filepaths():
         yield (
             xlrd.open_workbook(
                 path,
                 pickleable=0,
                 formatting_info=1,
                 on_demand=True,
                 ragged_rows=True),
             os.path.split(path)[1]
             )
コード例 #5
0
def get_book_sheet(excel_name, sheet_name):
    """Gets an Excel Worksheet from a given file name

    Parameters
    ----------
    excel_name : str
        The full path for the desired Excel file.
    sheet_name : str
        The name of the desired Excel Worksheet.

    Returns
    -------
    workbook, sheet : tuple
        A tuple with an ``xlwt.Workbook`` and an ``xlwt.Worksheet``
        object.

    """
    from desicos.xlrd import open_workbook
    from desicos.xlutils.copy import copy
    if os.path.isfile(excel_name):
        rb = open_workbook(excel_name, formatting_info=True)
        sheet_names = [s.name for s in rb.sheets()]
        #rs = rb.sheet_by_index(0)
        book = copy(rb)
        sheet = book.get_sheet(0)
        count = -1
        while True:
            count += 1
            new_sheet_name = sheet_name + '_%02d' % count
            if not new_sheet_name in sheet_names:
                sheet = book.add_sheet(new_sheet_name)
                break
    else:
        from desicos.xlwt import Workbook
        book = Workbook()
        sheet = book.add_sheet(sheet_name + '_00')

    return book, sheet
コード例 #6
0
ファイル: margins.py プロジェクト: yuscale/desicos
def check_file(fname,
               verbose,
               do_punc=False,
               fmt_info=0,
               encoding='ascii',
               onesheet=''):
    print
    print fname
    if do_punc:
        checker = ispunc
    else:
        checker = None
    try:
        book = open_workbook(fname, formatting_info=fmt_info, on_demand=True)
    except TypeError:
        try:
            book = open_workbook(fname, formatting_info=fmt_info)
        except TypeError:
            # this is becoming ridiculous
            book = open_workbook(fname)
    totold = totnew = totnotnull = 0
    if onesheet is None or onesheet == "":
        shxrange = range(book.nsheets)
    else:
        try:
            shxrange = [int(onesheet)]
        except ValueError:
            shxrange = [book.sheet_names().index(onesheet)]
    for shx in shxrange:
        sheet = book.sheet_by_index(shx)
        ngoodrows = number_of_good_rows(sheet, checker)
        ngoodcols = number_of_good_cols(sheet, checker, nrows=ngoodrows)
        oldncells = sheet.nrows * sheet.ncols
        newncells = ngoodrows * ngoodcols
        totold += oldncells
        totnew += newncells
        nnotnull = 0
        sheet_density_pct_s = ''
        if verbose >= 2:
            colxrange = range(ngoodcols)
            for rowx in xrange(ngoodrows):
                rowtypes = sheet.row_types(rowx)
                for colx in colxrange:
                    if rowtypes[colx] not in null_cell_types:
                        nnotnull += 1
            totnotnull += nnotnull
            sheet_density_pct = (nnotnull * 100.0) / max(1, newncells)
            sheet_density_pct_s = "; den = %5.1f%%" % sheet_density_pct
        if verbose >= 3:
            # which rows have non_empty cells in the right-most column?
            lastcolx = sheet.ncols - 1
            for rowx in xrange(sheet.nrows):
                cell = sheet.cell(rowx, lastcolx)
                if cell.ctype != XL_CELL_EMPTY:
                    print "%s (%d, %d): type %d, value %r" % (cellname(
                        rowx,
                        lastcolx), rowx, lastcolx, cell.ctype, cell.value)
        if (verbose or ngoodrows != sheet.nrows or ngoodcols != sheet.ncols
                or (verbose >= 2 and ngoodcells and sheet_density_pct < 90.0)):
            if oldncells:
                pctwaste = (1.0 - float(newncells) / oldncells) * 100.0
            else:
                pctwaste = 0.0
            shname_enc = safe_encode(sheet.name, encoding)
            print "sheet #%2d: RxC %5d x %3d => %5d x %3d; %4.1f%% waste%s (%s)" \
                % (shx, sheet.nrows, sheet.ncols,
                    ngoodrows, ngoodcols, pctwaste, sheet_density_pct_s, shname_enc)
        if hasattr(book, 'unload_sheet'):
            book.unload_sheet(shx)
    if totold:
        pctwaste = (1.0 - float(totnew) / totold) * 100.0
    else:
        pctwaste = 0.0
    print "%d cells => %d cells; %4.1f%% waste" % (totold, totnew, pctwaste)
コード例 #7
0
ファイル: margins.py プロジェクト: desicos/desicos
def check_file(fname, verbose, do_punc=False, fmt_info=0, encoding='ascii', onesheet=''):
    print
    print fname
    if do_punc:
        checker = ispunc
    else:
        checker = None
    try:
        book = open_workbook(fname, formatting_info=fmt_info, on_demand=True)
    except TypeError:
        try:
            book = open_workbook(fname, formatting_info=fmt_info)
        except TypeError:
            # this is becoming ridiculous
            book = open_workbook(fname)
    totold = totnew = totnotnull = 0
    if onesheet is None or onesheet == "":
        shxrange = range(book.nsheets)
    else:
        try:
            shxrange = [int(onesheet)]
        except ValueError:
            shxrange = [book.sheet_names().index(onesheet)]
    for shx in shxrange:
        sheet = book.sheet_by_index(shx)
        ngoodrows = number_of_good_rows(sheet, checker)
        ngoodcols = number_of_good_cols(sheet, checker, nrows=ngoodrows)
        oldncells = sheet.nrows * sheet.ncols
        newncells = ngoodrows * ngoodcols
        totold += oldncells
        totnew += newncells
        nnotnull = 0
        sheet_density_pct_s = ''
        if verbose >= 2:
            colxrange = range(ngoodcols)
            for rowx in xrange(ngoodrows):
                rowtypes = sheet.row_types(rowx)
                for colx in colxrange:
                    if rowtypes[colx] not in null_cell_types:
                        nnotnull += 1
            totnotnull += nnotnull
            sheet_density_pct = (nnotnull * 100.0) / max(1, newncells)
            sheet_density_pct_s = "; den = %5.1f%%" % sheet_density_pct
        if verbose >= 3:
            # which rows have non_empty cells in the right-most column?
            lastcolx = sheet.ncols - 1
            for rowx in xrange(sheet.nrows):
                cell = sheet.cell(rowx, lastcolx)
                if cell.ctype != XL_CELL_EMPTY:
                    print "%s (%d, %d): type %d, value %r" % (
                        cellname(rowx, lastcolx), rowx, lastcolx, cell.ctype, cell.value)
        if (verbose
            or ngoodrows != sheet.nrows
            or ngoodcols != sheet.ncols
            or (verbose >= 2 and ngoodcells and sheet_density_pct < 90.0)
            ):
            if oldncells:
                pctwaste = (1.0 - float(newncells) / oldncells) * 100.0
            else:
                pctwaste = 0.0
            shname_enc = safe_encode(sheet.name, encoding)
            print "sheet #%2d: RxC %5d x %3d => %5d x %3d; %4.1f%% waste%s (%s)" \
                % (shx, sheet.nrows, sheet.ncols,
                    ngoodrows, ngoodcols, pctwaste, sheet_density_pct_s, shname_enc)
        if hasattr(book, 'unload_sheet'):
            book.unload_sheet(shx)
    if totold:
        pctwaste = (1.0 - float(totnew) / totold) * 100.0
    else:
        pctwaste = 0.0
    print "%d cells => %d cells; %4.1f%% waste" % (totold, totnew, pctwaste)
コード例 #8
0
ファイル: xlrdnameAPIdemo.py プロジェクト: desicos/desicos
    [Initial direct access through book.name_map]
    Sales * 0 lists all occurrences of "Sales" in any scope
    [Direct access through book.name_and_scope_map]
    Revenue -1 0 checks if "Revenue" exists in global scope

"""
        sys.stdout.write(text)

    if len(sys.argv) != 5:
        usage()
        sys.exit(0)
    arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls"
    arg_name = sys.argv[2]    # see below
    arg_scope = sys.argv[3]   # see below
    arg_show_contents = int(sys.argv[4]) # 0: no show, 1: only non-empty cells,
                                         # 2: all cells
    for fname in glob.glob(arg_pattern):
        book = xlrd.open_workbook(fname)
        if arg_name == "*":
            # Examine book.name_obj_list to find all names
            # in a given scope ("*" => all scopes)
            do_scope_query(book, arg_scope, arg_show_contents)
        elif arg_scope == "*":
            # Using book.name_map to find all usage of a name.
            show_name_details(book, arg_name, arg_show_contents)
        else:
            # Using book.name_and_scope_map to find which if any instances
            # of a name are visible in the given scope, which can be supplied
            # as -1 (global) or a sheet number or a sheet name.
            show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)
コード例 #9
0
    [Direct access through book.name_and_scope_map]
    Revenue -1 0 checks if "Revenue" exists in global scope

"""
        sys.stdout.write(text)

    if len(sys.argv) != 5:
        usage()
        sys.exit(0)
    arg_pattern = sys.argv[1]  # glob pattern e.g. "foo*.xls"
    arg_name = sys.argv[2]  # see below
    arg_scope = sys.argv[3]  # see below
    arg_show_contents = int(
        sys.argv[4])  # 0: no show, 1: only non-empty cells,
    # 2: all cells
    for fname in glob.glob(arg_pattern):
        book = xlrd.open_workbook(fname)
        if arg_name == "*":
            # Examine book.name_obj_list to find all names
            # in a given scope ("*" => all scopes)
            do_scope_query(book, arg_scope, arg_show_contents)
        elif arg_scope == "*":
            # Using book.name_map to find all usage of a name.
            show_name_details(book, arg_name, arg_show_contents)
        else:
            # Using book.name_and_scope_map to find which if any instances
            # of a name are visible in the given scope, which can be supplied
            # as -1 (global) or a sheet number or a sheet name.
            show_name_details_in_scope(book, arg_name, arg_scope,
                                       arg_show_contents)