Exemple #1
0
def groupby(data,
            keys=None,
            size=None,
            min_size=None,
            max_size=None,
            contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Cube):
        return data.groupby(keys)

    keys = listwrap(keys)

    def get_keys(d):
        output = Dict()
        for k in keys:
            output[k] = d[k]
        return output

    if contiguous:
        try:
            if not data:
                return wrap([])

            agg = DictList()
            acc = DictList()
            curr_key = value2key(keys, data[0])
            for d in data:
                key = value2key(keys, d)
                if key != curr_key:
                    agg.append((get_keys(acc[0]), acc))
                    curr_key = key
                    acc = [d]
                else:
                    acc.append(d)
            agg.append((get_keys(acc[0]), acc))
            return wrap(agg)
        except Exception, e:
            Log.error("Problem grouping contiguous values", e)
Exemple #2
0
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Cube):
        return data.groupby(keys)

    if not isinstance(keys, (tuple, list)):
        keys = (keys,)
    def get_keys(d):
        output = Dict()
        for k in keys:
            output[k] = d[k]
        return output

    if contiguous:
        try:
            if not data:
                return wrap([])

            agg = DictList()
            acc = DictList()
            curr_key = value2key(keys, data[0])
            for d in data:
                key = value2key(keys, d)
                if key != curr_key:
                    agg.append((get_keys(acc[0]), acc))
                    curr_key = key
                    acc = [d]
                else:
                    acc.append(d)
            agg.append((get_keys(acc[0]), acc))
            return wrap(agg)
        except Exception, e:
            Log.error("Problem grouping contiguous values", e)
                key = value2key(keys, d)
                if key != curr_key:
                    agg.append((get_keys(acc[0]), acc))
                    curr_key = key
                    acc = [d]
                else:
                    acc.append(d)
            agg.append((get_keys(acc[0]), acc))
            return wrap(agg)
        except Exception, e:
            Log.error("Problem grouping contiguous values", e)

    try:
        agg = {}
        for d in data:
            key = value2key(keys, d)
            pair = agg.get(key)
            if pair is None:
                pair = (get_keys(d), DictList())
                agg[key] = pair
            pair[1].append(d)

        return agg.values()
    except Exception, e:
        Log.error("Problem grouping", e)


def groupby_size(data, size):
    if hasattr(data, "next"):
        iterator = data
    elif hasattr(data, "__iter__"):
Exemple #4
0
                key = value2key(keys, d)
                if key != curr_key:
                    agg.append((get_keys(acc[0]), acc))
                    curr_key = key
                    acc = [d]
                else:
                    acc.append(d)
            agg.append((get_keys(acc[0]), acc))
            return wrap(agg)
        except Exception, e:
            Log.error("Problem grouping contiguous values", e)

    try:
        agg = {}
        for d in data:
            key = value2key(keys, d)
            pair = agg.get(key)
            if pair is None:
                pair = (get_keys(d), DictList())
                agg[key] = pair
            pair[1].append(d)

        return agg.values()
    except Exception, e:
        Log.error("Problem grouping", e)


def groupby_size(data, size):
    if hasattr(data, "next"):
        iterator = data
    elif hasattr(data, "__iter__"):