def countit(fields, iter_of_iter, default=None): """ note: robust to fields not being in i_of_i, using ``default`` """ C = Counter() # needs hashables T = namedtuple("Thing", fields) get = default_iget(*fields, default=default) return Counter((T(*get(thing)) for thing in iter_of_iter))
def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, idx_type=None): """Create an index/multindex with given dimensions, levels, names, etc' nentries - number of entries in index nlevels - number of levels (> 1 produces multindex) prefix - a string prefix for labels names - (Optional), bool or list of strings. if True will use default names, if false will use no names, if a list is given, the name of each level in the index will be taken from the list. ndupe_l - (Optional), list of ints, the number of rows for which the label will repeated at the corresponding level, you can specify just the first few, the rest will use the default ndupe_l of 1. len(ndupe_l) <= nlevels. idx_type - "i"/"f"/"s"/"u"/"dt/"p". If idx_type is not None, `idx_nlevels` must be 1. "i"/"f" creates an integer/float index, "s"/"u" creates a string/unicode index "dt" create a datetime index. if unspecified, string labels will be generated. """ if ndupe_l is None: ndupe_l = [1] * nlevels assert (_is_sequence(ndupe_l) and len(ndupe_l) <= nlevels) assert (names is None or names is False or names is True or len(names) is nlevels) assert idx_type is None or \ (idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and nlevels == 1) if names is True: # build default names names = [prefix + str(i) for i in range(nlevels)] if names is False: # pass None to index constructor for no name names = None # make singelton case uniform if isinstance(names, compat.string_types) and nlevels == 1: names = [names] # specific 1D index type requested? idx_func = dict(i=makeIntIndex, f=makeFloatIndex, s=makeStringIndex, u=makeUnicodeIndex, dt=makeDateIndex, p=makePeriodIndex).get(idx_type) if idx_func: idx = idx_func(nentries) # but we need to fill in the name if names: idx.name = names[0] return idx elif idx_type is not None: raise ValueError('"%s" is not a legal value for `idx_type`, use ' '"i"/"f"/"s"/"u"/"dt/"p".' % idx_type) if len(ndupe_l) < nlevels: ndupe_l.extend([1] * (nlevels - len(ndupe_l))) assert len(ndupe_l) == nlevels assert all([x > 0 for x in ndupe_l]) tuples = [] for i in range(nlevels): def keyfunc(x): import re numeric_tuple = re.sub("[^\d_]_?", "", x).split("_") return lmap(int, numeric_tuple) # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 cnt = Counter() for j in range(div_factor): label = prefix + '_l%d_g' % i + str(j) cnt[label] = ndupe_l[i] # cute Counter trick result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] tuples.append(result) tuples = lzip(*tuples) # convert tuples to index if nentries == 1: index = Index(tuples[0], name=names[0]) else: index = MultiIndex.from_tuples(tuples, names=names) return index