def filter(data, where): """ where - a function that accepts (record, rownum, rows) and returns boolean """ if len(data) == 0 or where == None or where == TRUE: return data if isinstance(data, Container): return data.filter(where) if is_container(data): temp = get(where) dd = wrap(data) return wrap( [unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)]) else: Log.error("Do not know how to handle type {{type}}", type=data.__class__.__name__) try: return drill_filter(where, data) except Exception as _: # WOW! THIS IS INEFFICIENT! return wrap([ unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data]) ])
def sort(data, fieldnames=None, already_normalized=False): """ PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction} """ try: if data == None: return Null if isinstance(fieldnames, int): funcs = [(lambda t: t[fieldnames], 1)] else: if not fieldnames: return wrap(sort_using_cmp(data, value_compare)) if already_normalized: formal = fieldnames else: formal = query._normalize_sort(fieldnames) funcs = [(get(f.value), f.sort) for f in formal] def comparer(left, right): for func, sort_ in funcs: try: result = value_compare(func(left), func(right), sort_) if result != 0: return result except Exception as e: Log.error("problem with compare", e) return 0 if is_list(data): output = FlatList( [unwrap(d) for d in sort_using_cmp(data, cmp=comparer)]) elif is_text(data): Log.error("Do not know how to handle") elif hasattr(data, "__iter__"): output = FlatList( [unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)]) else: Log.error("Do not know how to handle") output = None return output except Exception as e: Log.error("Problem sorting\n{{data}}", data=data, cause=e)
def window(data, param): """ MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy) data - list of records """ name = param.name # column to assign window function result edges = param.edges # columns to gourp by where = param.where # DO NOT CONSIDER THESE VALUES sortColumns = param.sort # columns to sort by calc_value = get( param.value ) # function that takes a record and returns a value (for aggregation) aggregate = param.aggregate # WindowFunction to apply _range = ( param.range ) # of form {"min":-10, "max":0} to specify the size and relative position of window data = filter(data, where) if not aggregate and not edges: if sortColumns: data = sort(data, sortColumns, already_normalized=True) # SIMPLE CALCULATED VALUE for rownum, r in enumerate(data): try: r[name] = calc_value(r, rownum, data) except Exception as e: raise e return try: edge_values = [e.value.var for e in edges] except Exception as e: raise Log.error("can only support simple variable edges", cause=e) if not aggregate or aggregate == "none": for _, values in groupby(data, edge_values): if not values: continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE if sortColumns: sequence = sort(values, sortColumns, already_normalized=True) else: sequence = values for rownum, r in enumerate(sequence): r[name] = calc_value(r, rownum, sequence) return for keys, values in groupby(data, edge_values): if not values: continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE sequence = sort(values, sortColumns) for rownum, r in enumerate(sequence): r["__temp__"] = calc_value(r, rownum, sequence) head = coalesce(_range.max, _range.stop) tail = coalesce(_range.min, _range.start) # PRELOAD total total = aggregate() for i in range(tail, head): total.add(sequence[i].__temp__) # WINDOW FUNCTION APPLICATION for i, r in enumerate(sequence): r[name] = total.end() total.add(sequence[i + head].__temp__) total.sub(sequence[i + tail].__temp__) for r in data: r["__temp__"] = None # CLEANUP