Example #1
0
    def partition_nd(columns, filter_value, possible_values):
        # note that since we don't iterate through the columns many times,
        # it's not worth it to copy non contiguous columns in this version
        d = group_indices_nd(columns, filter_value)

        if len(columns) > 1:
            pvalues = product(*possible_values)
        else:
            pvalues = possible_values[0]

        empty_list = np.empty(0, dtype=int)

        # XXX: It would be nice to print a warning if d contains keys not in
        # pvalues but that might be hard to implement efficiently in the python
        # version (and I am not eager to diverge too much).
        return [d.get(pv, empty_list) for pv in pvalues]
Example #2
0
    def partition_nd(columns, filter_value, possible_values):
        assert len(columns) > 0
        assert all(isinstance(c, np.ndarray) for c in columns), \
            "not all columns are ndarrays: " + \
            ', '.join(str(type(c)) for c in columns)
        # note that since we don't iterate through the columns many times,
        # it's not worth it to copy non contiguous columns in this version
        d = group_indices_nd(columns, filter_value)

        if len(columns) > 1:
            pvalues = product(*possible_values)
        else:
            pvalues = possible_values[0]

        empty_list = np.empty(0, dtype=int)

        # XXX: It would be nice to print a warning if d contains keys not in
        # pvalues but that might be hard to implement efficiently in the python
        # version (and I am not eager to diverge too much).
        return [d.get(pv, empty_list) for pv in pvalues]
Example #3
0
def group_context(used_variables, setfilter, context):
    """
    return a dict of the form:
    {'field1': array1, 'field2': array2, 'idx': array_of_arrays_of_ids}
    """
    names = sorted(used_variables)
    columns = [context[name] for name in names]

    # group_indices_nd returns a dict {value_or_tuple: array_of_indices}
    d = group_indices_nd(columns, setfilter)

    keylists = zip(*d.keys()) if len(columns) > 1 else [d.keys()]
    keyarrays = [np.array(c) for c in keylists]

    # we want a 1d array of arrays, not the 2d array that np.array(d.values())
    # produces if we have a list of arrays with all the same length
    idcol = context['id']
    ids_by_group = np.empty(len(d), dtype=object)
    ids_by_group[:] = [idcol[v] for v in d.values()]

    result = dict(zip(names, keyarrays))
    result['__ids__'] = ids_by_group
    result['__len__'] = len(d)
    return result