def test_parse_serialize(): expr = parse('(fun 1 "foo" nil #f #:c 5 (zogzog 42 #t))') assert [ node.__class__.__name__ for node in expr ] == [ 'Symbol', 'int', 'str', 'NoneType', 'bool', 'Keyword', 'int', 'list' ] assert serialize(parse('(fun 1 #t #f #:c "babar")')) == '(fun 1 #t #f #:c "babar")' assert serialize(parse('(fun 1 nil #:c (+ 5.0 7))')) == '(fun 1 nil #:c (+ 5.0 7))'
def test_things(): env = Env({ '+': op.add, '*': op.mul, 'today': date.today, }) assert evaluate('(+ 3 5)', env) == 8 print( evaluate('(today)', env) ) assert parse('(+ 3 (* 4 5))') == ['+', 3, ['*', 4, 5]] assert parse('(and #t #f)') == ['and', True, False] assert repr(Symbol('s')) == "'s" assert repr(Keyword('k')) == '#:k'
def test_failing_kw(engine, tsh): expr = '(+ 1 (series "types-a" #:fill "ffill" #:prune "toto"))' i = Interpreter(engine, tsh, {}) with pytest.raises(TypeError) as err: typecheck(lisp.parse(expr), i.env) assert err.value.args[0] == "keyword `prune` = 'toto' not of typing.Union[int, NoneType]"
def test_complex_typecheck(engine, tsh): expr = ('(add (series "types-a") ' ' (priority (series "types-a") ' ' (* 2 (series "types-b"))))' ) i = Interpreter(engine, tsh, {}) typecheck(lisp.parse(expr), i.env)
def test_basic_typecheck(): def plus(a: int, b: int) -> int: return a + b env = lisp.Env({'+': plus}) expr = ('(+ 3 4)') typecheck(lisp.parse(expr), env=env) expr = ('(+ 3 "hello")') with pytest.raises(TypeError): typecheck(lisp.parse(expr), env=env) def mul(a: int, b: int) -> int: return a * b env = lisp.Env({'+': plus, '*': mul}) expr = ('(* 2 (+ 3 "hello"))') with pytest.raises(TypeError): typecheck(lisp.parse(expr), env=env)
def test_evaluator(): form = '(+ 2 3)' with pytest.raises(LookupError): e = lisp.evaluate(form, lisp.Env()) env = lisp.Env({'+': lambda a, b: a + b}) e = lisp.evaluate(form, env) assert e == 5 brokenform = '(+ 2 3' with pytest.raises(SyntaxError): lisp.parse(brokenform) expr = ('(+ (* 8 (/ 5. 2)) 1.1)') tree = constant_fold(lisp.parse(expr)) assert tree == 21.1 expr = ('(+ (* 8 (/ 5. 2)) (series "foo"))') tree = constant_fold(lisp.parse(expr)) assert tree == ['+', 20.0, ['series', 'foo']]
def test_failing_arg(engine, tsh): expr = ('(add (series "types-a") ' ' (priority (series "types-a") ' ' (* "toto" (series "types-b"))))' ) i = Interpreter(engine, tsh, {}) with pytest.raises(TypeError) as err: typecheck(lisp.parse(expr), i.env) assert err.value.args[0] == "'toto' not of <class 'numbers.Number'>"
def typecheck_formula(db_uri, pdbshell=False, namespace='tsh'): engine = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) i = Interpreter(engine, tsh, {}) for name, kind in tsh.list_series(engine).items(): if kind != 'formula': continue formula = tsh.formula(engine, name) parsed = parse(formula) print(name, f'`{parsed[0]}`') typecheck(parsed, env=i.env)
def rename(self, cn, oldname, newname): # read all formulas and parse them ... formulas = cn.execute( f'select name, text from "{self.namespace}".formula').fetchall() errors = [] def edit(tree, oldname, newname): newtree = [] series = False for node in tree: if isinstance(node, list): newtree.append(edit(node, oldname, newname)) continue if node == 'series': series = True newtree.append(node) continue elif node == oldname and series: node = newname newtree.append(node) series = False return newtree for fname, text in formulas: tree = parse(text) seriesmeta = self.find_series(cn, tree) if newname in seriesmeta: errors.append(fname) if oldname not in seriesmeta or errors: continue newtree = edit(tree, oldname, newname) newtext = serialize(newtree) sql = (f'update "{self.namespace}".formula ' 'set text = %(text)s ' 'where name = %(name)s') cn.execute(sql, text=newtext, name=fname) if errors: raise ValueError( f'new name is already referenced by `{",".join(errors)}`') if self.type(cn, oldname) == 'formula': cn.execute( f'update "{self.namespace}".formula ' 'set name = %(newname)s ' 'where name = %(oldname)s', oldname=oldname, newname=newname) else: super().rename(cn, oldname, newname)
def fix_slice(db_uri, really=False, namespace='tsh'): e = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) for name, kind in tsh.list_series(e).items(): if kind != 'formula': continue # parse+serialize -> normalization step form = serialize(parse(tsh.formula(e, name))) tree = parse(form) newtree = rewrite_slice(tree) newform = serialize(newtree) if form != newform: print('rewritten', name) print(' was', form) print(' ->', newform) if not really: continue tsh.register_formula(e, name, newform, update=True) if not really: print('UNCHANGED. To apply changes, pass --really')
def insertion_dates(self, cn, name, fromdate=None, todate=None): if self.type(cn, name) != 'formula': return super().insertion_dates(cn, name, fromdate=fromdate, todate=todate) formula = self.formula(cn, name) series = self.find_series(cn, parse(formula)) allrevs = [] for name in series: allrevs += self._revisions(cn, name, from_insertion_date=fromdate, to_insertion_date=todate) return sorted(set(allrevs))
def justdoit(): for name, kind in tsh.list_series(engine).items(): if kind != 'formula': continue print(name) tree = parse(tsh.formula(engine, name)) smap = tsh.find_series(engine, tree) try: meta = tsh.filter_metadata(smap) except ValueError as err: errors.append((name, err)) continue if not meta or 'index_dtype' not in meta: todo.append(name) print(' -> todo') continue tsh.update_metadata(engine, name, meta)
def has_compatible_operators(cn, tsh, tree, good_operators): operators = [tree[0]] for param in tree[1:]: if isinstance(param, list): operators.append(param[0]) if any(op not in good_operators for op in operators): return False op = operators[0] names = tsh.find_series(cn, tree) for name in names: formula = tsh.formula(cn, name) if formula: tree = parse(formula) if not has_compatible_operators( cn, tsh, tree, good_operators): return False return True
def __init__(self, tsa, seriesname, getargs): assert tsa.exists(seriesname) formula = tsa.formula(seriesname) tree = parse(formula) def get(name): ts = tsa.get(name, **getargs) if ts is None: return pd.Series(name=name) return ts self.infos = [{ 'name': name, 'ts': get(name), 'type': tsa.type(name) } for name in tsa.tsh.find_series(tsa.engine, tree)] self.infos.insert(0, { 'name': seriesname, 'ts': get(seriesname), 'type': 'formula' })
def drop_alias_tables(db_uri, drop=False, namespace='tsh'): engine = create_engine(find_dburi(db_uri)) # convert outliers to clip operator elts = { k: (min, max) for k, min, max in engine.execute( 'select serie, min, max from tsh.outliers').fetchall() } tsh = timeseries(namespace) rewriteme = [] for name, kind in tsh.list_series(engine).items(): if kind != 'formula': continue tree = parse(tsh.formula(engine, name)) smap = tsh.find_series(engine, tree) for sname in smap: if sname in elts: rewriteme.append((name, tree)) break for name, tree in rewriteme: tree2 = rewrite(tree, elts) print(name) print(serialize(tree)) print('->') print(serialize(tree2)) print() tsh.register_formula(engine, name, serialize(tree2), update=True) if not drop: print('DID NOT DROP the tables') print('pass --drop to really drop them') return with engine.begin() as cn: cn.execute(f'drop table if exists "{namespace}".arithmetic') cn.execute(f'drop table if exists "{namespace}".priority') cn.execute(f'drop table if exists "{namespace}".outliers')
def register_formula(self, cn, name, formula, reject_unknown=True, update=False): if not update: assert not self.formula(cn, name), f'`{name}` already exists' if self.exists(cn, name) and self.type(cn, name) == 'primary': raise TypeError( f'primary series `{name}` cannot be overriden by a formula') # basic syntax check tree = parse(formula) formula = serialize(tree) # build metadata & check compat seriesmeta = self.find_series(cn, tree) if not all(seriesmeta.values()) and reject_unknown: badseries = [k for k, v in seriesmeta.items() if not v] raise ValueError(f'Formula `{name}` refers to unknown series ' f'{", ".join("`%s`" % s for s in badseries)}') # bad operators operators = self.find_operators(cn, tree) badoperators = [op for op, func in operators.items() if func is None] if badoperators: raise ValueError(f'Formula `{name}` refers to unknown operators ' f'{", ".join("`%s`" % o for o in badoperators)}') # type checking i = interpreter.Interpreter(cn, self, {}) helper.typecheck(tree, env=i.env) meta = self.filter_metadata(seriesmeta) sql = (f'insert into "{self.namespace}".formula ' '(name, text) ' 'values (%(name)s, %(text)s) ' 'on conflict (name) do update ' 'set text = %(text)s') cn.execute(sql, name=name, text=formula) if meta: self.update_metadata(cn, name, meta, internal=True)
def staircase(self, cn, name, delta, from_value_date=None, to_value_date=None): formula = self.formula(cn, name) if formula: if interpreter.has_compatible_operators( cn, self, parse(formula), self.fast_staircase_operators): # go fast return self.get( cn, name, from_value_date=from_value_date, to_value_date=to_value_date, __interpreter__=interpreter.FastStaircaseInterpreter( cn, self, { 'from_value_date': from_value_date, 'to_value_date': to_value_date }, delta)) return super().staircase(cn, name, delta, from_value_date, to_value_date)
def expanded(tsh, cn, tree): # base case: check the current operation op = tree[0] finder = FINDERS.get(op) seriesmeta = finder(cn, tsh, tree) if finder else None if seriesmeta: # hidden assumption: true series operators # operate one series at a time (e.g. `series`) # hence we can be brutal ... if len(seriesmeta) == 1: # if not: unexpandable name, meta = seriesmeta.popitem() if meta.get('expandable', False): if tsh.type(cn, name) == 'formula': formula = tsh.formula(cn, name) subtree = parse(formula) return expanded(tsh, cn, subtree) newtree = [] for item in tree: if isinstance(item, list): newtree.append(expanded(tsh, cn, item)) else: newtree.append(item) return newtree
def test_rewrite_slice(): form = '(* 3 (slice (series "42") #:fromdate "2020-1-1" #:todate (today)))' newform = serialize(rewrite_slice(parse(form))) assert newform == ( '(* 3 (slice (series "42") #:fromdate (date "2020-1-1") #:todate (today)))' )
def test_kw_subexpr(engine, tsh): expr = '(+ 1 (series "types-a" #:prune (+ 1 2)))' i = Interpreter(engine, tsh, {}) typecheck(lisp.parse(expr), i.env)
def history(self, cn, name, from_insertion_date=None, to_insertion_date=None, from_value_date=None, to_value_date=None, diffmode=False, _keep_nans=False, _tree=None): if self.type(cn, name) != 'formula': # autotrophic operator ? if name is None: assert _tree i = interpreter.OperatorHistory( cn, self, { 'from_value_date': from_value_date, 'to_value_date': to_value_date, 'from_insertion_date': from_insertion_date, 'to_insertion_date': to_insertion_date, 'diffmode': diffmode, '_keep_nans': _keep_nans }) return i.evaluate_history(_tree) # normal series ? hist = super().history(cn, name, from_insertion_date, to_insertion_date, from_value_date, to_value_date, diffmode, _keep_nans) # alternative source ? if hist is None and self.othersources: hist = self.othersources.history( name, from_value_date=from_value_date, to_value_date=to_value_date, from_insertion_date=from_insertion_date, to_insertion_date=to_insertion_date, _keep_nans=_keep_nans) return hist assert not diffmode formula = self.formula(cn, name) tree = parse(formula) series = self.find_series(cn, tree) # normal history histmap = { name: self.history(cn, name, from_insertion_date, to_insertion_date, from_value_date, to_value_date, diffmode) or {} for name in series } # prepare work for autotrophic operator history callsites = [] for sname in HISTORY: for call in self.find_callsites(cn, sname, tree): callsites.append(call) # autotrophic history histmap.update({ name: self.history( cn, None, # just mark that we won't work "by name" there from_insertion_date, to_insertion_date, from_value_date, to_value_date, diffmode, _tree=callsite) or {} for callsite in callsites }) i = interpreter.HistoryInterpreter( cn, self, { 'from_value_date': from_value_date, 'to_value_date': to_value_date }, histories=histmap) idates = {idate for hist in histmap.values() for idate in hist} return { idate: i.evaluate(formula, idate, name) for idate in sorted(idates) }
def expanded_formula(self, cn, name): formula = self.formula(cn, name) tree = parse(formula) return serialize(helper.expanded(self, cn, tree))