def partition_nd(columns, filter_value, possible_values): # note that since we don't iterate through the columns many times, # it's not worth it to copy non contiguous columns in this version d = group_indices_nd(columns, filter_value) if len(columns) > 1: pvalues = product(*possible_values) else: pvalues = possible_values[0] empty_list = np.empty(0, dtype=int) # XXX: It would be nice to print a warning if d contains keys not in # pvalues but that might be hard to implement efficiently in the python # version (and I am not eager to diverge too much). return [d.get(pv, empty_list) for pv in pvalues]
def partition_nd(columns, filter_value, possible_values): assert len(columns) > 0 assert all(isinstance(c, np.ndarray) for c in columns), \ "not all columns are ndarrays: " + \ ', '.join(str(type(c)) for c in columns) # note that since we don't iterate through the columns many times, # it's not worth it to copy non contiguous columns in this version d = group_indices_nd(columns, filter_value) if len(columns) > 1: pvalues = product(*possible_values) else: pvalues = possible_values[0] empty_list = np.empty(0, dtype=int) # XXX: It would be nice to print a warning if d contains keys not in # pvalues but that might be hard to implement efficiently in the python # version (and I am not eager to diverge too much). return [d.get(pv, empty_list) for pv in pvalues]
def group_context(used_variables, setfilter, context): """ return a dict of the form: {'field1': array1, 'field2': array2, 'idx': array_of_arrays_of_ids} """ names = sorted(used_variables) columns = [context[name] for name in names] # group_indices_nd returns a dict {value_or_tuple: array_of_indices} d = group_indices_nd(columns, setfilter) keylists = zip(*d.keys()) if len(columns) > 1 else [d.keys()] keyarrays = [np.array(c) for c in keylists] # we want a 1d array of arrays, not the 2d array that np.array(d.values()) # produces if we have a list of arrays with all the same length idcol = context['id'] ids_by_group = np.empty(len(d), dtype=object) ids_by_group[:] = [idcol[v] for v in d.values()] result = dict(zip(names, keyarrays)) result['__ids__'] = ids_by_group result['__len__'] = len(d) return result