def cached_part(query, cache=None): """Get cached part of the query. Use either supplied cache object or global cache object (default). In the process, query is into two parts: the beginning of the query and the remainder. Function tries to find longest possible beginning of the query which is cached, then returns the cached state and the remainder of the query. (query == state.query + "/" + remainder) """ if cache is None: cache = get_cache() if isinstance( cache, NoCache ): # Just an optimization - to avoid looping over all query splits return State(), encode(decode(query)) for key, remainder in all_splits(query): if key == "": return State(), remainder if cache.contains(key): state = cache.get(key) if state is None: continue return state, remainder # Should never get here, but this is a sensible default: return State(), encode(decode(query))
def evaluate_query_on(query, state=None, cache=None): """Evaluate query on state, returns a State, cache the output in supplied cache Unlike evaluate function, this function does not try to fetch state from cache, but it uses a supplied state (if available). """ ql = decode(query) return evaluate_ql_on(ql, state=state, cache=cache)
def qtsplit_df(state, *columns): """Quick/query split of dataframe by columns (version expecting a first row with tags) Creates a dataframe with unique (combinations of) value from supplied columns and queries to obtain the corresponding filtered dataframes from the original dataframe. Resulting queries are put in query column. Name of the query column can be overriden by query_column state variable. """ df = state.get() tags = df.iloc[0] df = df.iloc[1:] if len(columns) == 1: keys = [(x,) for x in sorted(df.groupby(by=list(columns)).groups.keys())] else: keys = sorted(df.groupby(by=list(columns)).groups.keys()) query_column = state.vars.get("query_column") if query_column is None: query_column = "query" sdf = pd.DataFrame(columns=list(columns) + [query_column]) sdf = sdf.append({c: tags[c] for c in columns}, ignore_index=True) data = [] ql = decode(state.query) for row in keys: pairs = list(zip(columns, row)) d = dict(pairs) query = encode(ql + [["teq"] + [str(x) for p in pairs for x in p]]) d[query_column] = query sdf = sdf.append(d, ignore_index=True) return state.with_data(sdf)