def iteraddrownumbers(table, start, step): it = iter(table) flds = it.next() outflds = ['row'] outflds.extend(flds) yield tuple(outflds) for row, n in izip(it, count(start, step)): outrow = [n] outrow.extend(row) yield tuple(outrow)
def _recursive_bin(outerbin, level, bindef, fields, keys, widths, getval, mins, maxs): # TODO this is almost impossible to comprehend, needs to be tidied up! bindef = list(bindef) # take a copy if level == len(keys): # bottom out vals = (getval(row) for row in outerbin) yield tuple(bindef), vals else: # go deeper key = keys[level] getkey = rowitemgetter(fields, key) width = widths[level] minv = mins[level] maxv = maxs[level] # initialise at this level tbl = itertools.chain([fields], outerbin) # reconstitute table with header tbl_sorted = sort(tbl, key) # sort at this level it = iter(tbl_sorted) # get an iterator it.next() # throw away header if minv is not None and maxv is not None: # use a different algorithm if minv and maxv are specified - fixed bins numbins = int(math.ceil((maxv - minv) / width)) keyv = None for n in xrange(0, numbins): binminv = minv + n*width binmaxv = binminv + width if binmaxv >= maxv: # final bin binmaxv = maxv # truncate final bin to specified maximum thisbindef = list(bindef) thisbindef.append((binminv, binmaxv)) binnedrows = [] try: while keyv < binminv: # advance until we're within the bin's range row = it.next() keyv = getkey(row) while binminv <= keyv < binmaxv: # within the bin binnedrows.append(row) row = it.next() keyv = getkey(row) while keyv == binmaxv == maxv: # possible floating point precision bug here? binnedrows.append(row) # last bin is open if maxv is specified row = it.next() keyv = getkey(row) except StopIteration: pass for r in _recursive_bin(binnedrows, level+1, thisbindef, fields, keys, widths, getval, mins, maxs): yield r else: # use a different algorithm if min or max is not specified, where # the unspecified limit is determined from the data # initialise minimum value try: row = it.next() # what happens if this raises StopIteration? except StopIteration: pass else: keyv = getkey(row) if minv is None: minv = keyv # N.B., we need to account for two possible scenarios # (1) maxv is not specified, so keep making bins until we run out of rows # (2) maxv is specified, so iterate over bins up to maxv try: for binminv in count(minv, width): binmaxv = binminv + width if maxv is not None and binmaxv >= maxv: # final bin binmaxv = maxv # truncate final bin to specified maximum thisbindef = list(bindef) thisbindef.append((binminv, binmaxv)) binnedrows = [] while keyv < binminv: # advance until we're within the bin's range row = it.next() keyv = getkey(row) while binminv <= keyv < binmaxv: # within the bin binnedrows.append(row) row = it.next() keyv = getkey(row) while maxv is not None and keyv == binmaxv == maxv: # possible floating point precision bug here? binnedrows.append(row) # last bin is open if maxv is specified row = it.next() keyv = getkey(row) for r in _recursive_bin(binnedrows, level+1, thisbindef, fields, keys, widths, getval, mins, maxs): yield r # possible floating point precision bug here? if maxv is not None and binmaxv == maxv: break except StopIteration: # don't forget to handle the last bin for r in _recursive_bin(binnedrows, level+1, thisbindef, fields, keys, widths, getval, mins, maxs): yield r