def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False): """ return list of (keys, values) pairs where group by the set of keys values IS LIST OF ALL data that has those keys contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES """ if size != None or min_size != None or max_size != None: if size != None: max_size = size return groupby_min_max_size(data, min_size=min_size, max_size=max_size) if isinstance(data, Cube): return data.groupby(keys) keys = listwrap(keys) def get_keys(d): output = Dict() for k in keys: output[k] = d[k] return output if contiguous: try: if not data: return wrap([]) agg = DictList() acc = DictList() curr_key = value2key(keys, data[0]) for d in data: key = value2key(keys, d) if key != curr_key: agg.append((get_keys(acc[0]), acc)) curr_key = key acc = [d] else: acc.append(d) agg.append((get_keys(acc[0]), acc)) return wrap(agg) except Exception, e: Log.error("Problem grouping contiguous values", e)
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False): """ return list of (keys, values) pairs where group by the set of keys values IS LIST OF ALL data that has those keys contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES """ if size != None or min_size != None or max_size != None: if size != None: max_size = size return groupby_min_max_size(data, min_size=min_size, max_size=max_size) if isinstance(data, Cube): return data.groupby(keys) if not isinstance(keys, (tuple, list)): keys = (keys,) def get_keys(d): output = Dict() for k in keys: output[k] = d[k] return output if contiguous: try: if not data: return wrap([]) agg = DictList() acc = DictList() curr_key = value2key(keys, data[0]) for d in data: key = value2key(keys, d) if key != curr_key: agg.append((get_keys(acc[0]), acc)) curr_key = key acc = [d] else: acc.append(d) agg.append((get_keys(acc[0]), acc)) return wrap(agg) except Exception, e: Log.error("Problem grouping contiguous values", e)
key = value2key(keys, d) if key != curr_key: agg.append((get_keys(acc[0]), acc)) curr_key = key acc = [d] else: acc.append(d) agg.append((get_keys(acc[0]), acc)) return wrap(agg) except Exception, e: Log.error("Problem grouping contiguous values", e) try: agg = {} for d in data: key = value2key(keys, d) pair = agg.get(key) if pair is None: pair = (get_keys(d), DictList()) agg[key] = pair pair[1].append(d) return agg.values() except Exception, e: Log.error("Problem grouping", e) def groupby_size(data, size): if hasattr(data, "next"): iterator = data elif hasattr(data, "__iter__"):