Example #1
0
 def get_workbooks(self):
     if self.temp_path is None:
         return
     filenames = []
     for name in os.listdir(self.temp_path):
         d = name.split('-',1)
         d.append(name)
         filenames.append(d)
     filenames.sort()
     for i,filename,pathname in filenames:
         yield (
             # We currently don't open with on_demand=True here
             # as error filters should be lastish in the chain
             # so there's not much win.
             # However, if we did, getting rid of the temp dirs
             # becomes a problem as, on Windows, they can't be
             # deleted until the xlrd.Book object is done with
             # and we don't know when that might be :-(
             xlrd.open_workbook(
                 os.path.join(self.temp_path,pathname),
                 pickleable=0,
                 formatting_info=1,
                 on_demand=False,
                 ragged_rows=True
                 ),
             filename
             )
Example #2
0
    def parse(cls, template_path):
        """Read and parse the template file specified by *template_path*."""

        self = cls()
        w = xlrd.open_workbook(template_path, formatting_info=True)
        for idx, sht in enumerate(w.sheets()):
            self.tmp_groups = {}
            sheet_macro = Macro(sht.name)
            self.meta[idx] = meta = SheetMetaInfo()
            meta.sheet_macro = sheet_macro

            for rx in xrange(sht.nrows):
                cols = ((cx, sht.cell_value(rx, cx)) for cx in xrange(sht.ncols))
                cols = [col for col in cols if len(unicode(col[-1])) > 0]
                if len(cols) == 0:
                    continue 

                macros, groups = self.parse_column(cols)
                meta.macros += [MacroDef(rx, cx, m, *es) for cx, m, es in macros]

                group_starts = [(i, m, es) for (i, m, es) in groups if m._is_group_start]
                if len(group_starts) > 0:
                    group_macros = GroupMacroDef.parse(group_starts)
                    head = group_starts[0][0]
                    for g in group_macros:
                        self.register_group(rx, head, g)

                group_ends = ((i, m) for (i, m, es) in groups if not m._is_group_start)
                for g in group_ends:
                    self.process_group(idx, rx, *g)

        w = None
        return self
Example #3
0
 def get_workbooks(self):
     """
     If the data to be processed is not stored in files or if
     special parameters need to be passed to xlrd.open_workbook
     then this method must be overriden.
     Any implementation must return an iterable sequence of tuples.
     The first element of which must be an xlrd.Book object and the
     second must be the filename of the file from which the book
     object came.
     """
     for path in self.get_filepaths():
         yield (
             xlrd.open_workbook(
                 path,
                 pickleable=0,
                 formatting_info=1,
                 on_demand=True,
                 ragged_rows=True),
             os.path.split(path)[1]
             )
Example #4
0
File: cne.py Project: shaung/xlpy
def create_copy(fpath):
    wt = open_workbook(fpath, formatting_info=True)
    w = copy_book(wt)
    return CneBook(w, fpath)
Example #5
0
File: cne.py Project: shaung/xlpy
 def __init__(self, book, old_path):
     self.oldbook = open_workbook(old_path, formatting_info=True)
     self.workbook = book
     self.style_list = get_xlwt_style_list(self.oldbook) 
Example #6
0
def check_file(fname, verbose, do_punc=False, fmt_info=0, encoding='ascii', onesheet=''):
    print
    print fname
    if do_punc:
        checker = ispunc
    else:
        checker = None
    try:
        book = open_workbook(fname, formatting_info=fmt_info, on_demand=True)
    except TypeError:
        try:
            book = open_workbook(fname, formatting_info=fmt_info)
        except TypeError:
            # this is becoming ridiculous
            book = open_workbook(fname)
    totold = totnew = totnotnull = 0
    if onesheet is None or onesheet == "":
        shxrange = range(book.nsheets)
    else:
        try:
            shxrange = [int(onesheet)]
        except ValueError:
            shxrange = [book.sheet_names().index(onesheet)]
    for shx in shxrange:
        sheet = book.sheet_by_index(shx)
        ngoodrows = number_of_good_rows(sheet, checker)
        ngoodcols = number_of_good_cols(sheet, checker, nrows=ngoodrows)
        oldncells = sheet.nrows * sheet.ncols
        newncells = ngoodrows * ngoodcols
        totold += oldncells
        totnew += newncells
        nnotnull = 0
        sheet_density_pct_s = ''
        if verbose >= 2:
            colxrange = range(ngoodcols)
            for rowx in xrange(ngoodrows):
                rowtypes = sheet.row_types(rowx)
                for colx in colxrange:
                    if rowtypes[colx] not in null_cell_types:
                        nnotnull += 1
            totnotnull += nnotnull
            sheet_density_pct = (nnotnull * 100.0) / max(1, newncells)
            sheet_density_pct_s = "; den = %5.1f%%" % sheet_density_pct
        if verbose >= 3:
            # which rows have non_empty cells in the right-most column?
            lastcolx = sheet.ncols - 1
            for rowx in xrange(sheet.nrows):
                cell = sheet.cell(rowx, lastcolx)
                if cell.ctype != XL_CELL_EMPTY:
                    print "%s (%d, %d): type %d, value %r" % (
                        cellname(rowx, lastcolx), rowx, lastcolx, cell.ctype, cell.value)
        if (verbose
            or ngoodrows != sheet.nrows
            or ngoodcols != sheet.ncols
            or (verbose >= 2 and ngoodcells and sheet_density_pct < 90.0)
            ):
            if oldncells:
                pctwaste = (1.0 - float(newncells) / oldncells) * 100.0
            else:
                pctwaste = 0.0
            shname_enc = safe_encode(sheet.name, encoding)
            print "sheet #%2d: RxC %5d x %3d => %5d x %3d; %4.1f%% waste%s (%s)" \
                % (shx, sheet.nrows, sheet.ncols,
                    ngoodrows, ngoodcols, pctwaste, sheet_density_pct_s, shname_enc)
        if hasattr(book, 'unload_sheet'):
            book.unload_sheet(shx)
    if totold:
        pctwaste = (1.0 - float(totnew) / totold) * 100.0
    else:
        pctwaste = 0.0
    print "%d cells => %d cells; %4.1f%% waste" % (totold, totnew, pctwaste)