Esempio n. 1
0
def fix_supervision_status(dburi, name=None, namespace='tsh'):
    engine = create_engine(find_dburi(dburi))
    tsh = timeseries(namespace)
    if name:
        series = [name]
    else:
        series = [
            name for name, stype in tsh.list_series(engine).items()
            if stype == 'primary'
        ]

    categories = defaultdict(list)

    bar = tqdm.tqdm(range(len(series)))
    for name in series:
        meta = tsh.metadata(engine, name)
        if 'supervision_status' in meta:
            continue

        categories[status].append(name)
        meta['supervision_status'] = status
        with engine.begin() as cn:
            tsh.update_metadata(cn, name, meta, internal=True)

            # reclaim space
            if status in ('handcrafted', 'unsupervised'):
                if tsh.upstream.exists(cn, name):
                    tsh.upstream.delete(cn, name)

        bar.update()
    bar.close()

    print('unsupervised', len(categories['unsupervised']))
    print('handcrafted', len(categories['handcrafted']))
    print('supervised', len(categories['supervised']))
Esempio n. 2
0
def view(db_uri, handler, debug=False):
    """visualize time series through the web"""
    uri = find_dburi(db_uri)
    ipaddr = host()
    port = int(getenv('TSVIEW_PORT', 5678))

    serieshandler = tshclass(handler)
    if debug:
        kickoff(ipaddr, port, uri, serieshandler)
        return

    server = Thread(name='tsview.webapp',
                    target=kickoff,
                    kwargs={
                        'host': ipaddr,
                        'port': port,
                        'dburi': uri,
                        'handler': serieshandler
                    })
    server.daemon = True
    server.start()

    webbrowser.open('http://{ipaddr}:{port}/tsview'.format(ipaddr=ipaddr,
                                                           port=port))
    input()
Esempio n. 3
0
def check(db_uri, series=None, namespace='tsh'):
    "coherence checks of the db"
    e = create_engine(find_dburi(db_uri))
    tsh = timeseries(namespace)
    if series is None:
        series = tsh.list_series(e)
    else:
        series = [series]

    for idx, s in enumerate(series):
        t0 = time()
        with e.begin() as cn:
            hist = tsh.history(cn, s)
        start, end = None, None
        mon = True
        for ts in hist.values():
            cmin = ts.index.min()
            cmax = ts.index.max()
            start = min(start or cmin, cmin)
            end = max(end or cmax, cmax)
            mon = ts.index.is_monotonic_increasing
        ival = tsh.interval(e, s)
        if ival.left != start:
            print('  start:', s, f'{ival.left} != {start}')
        if ival.right != end:
            print('  end:', s, f'{ival.right} != {end}')
        monmsg = '' if mon else 'non-monotonic'
        print(
            idx, s,
            'inserts={}, read-time={} {}'.format(len(hist),
                                                 time() - t0, monmsg))
Esempio n. 4
0
def info(db_uri, namespace='tsh'):
    """show global statistics of the repository"""
    engine = create_engine(find_dburi(db_uri))

    info = timeseries(namespace).info(engine)
    info['serie names'] = ', '.join(info['serie names'])
    print(INFOFMT.format(**info))
Esempio n. 5
0
def verify_aliases(dburi, only=None, namespace='tsh'):
    " verify aliases wholesale (all or per type using --only) "
    if only is None:
        tables = TABLES
    else:
        assert only in TABLES
        tables = [only]

    engine = create_engine(find_dburi(dburi))
    tsh = tsio.timeseries(namespace=namespace)
    for table in tables:
        colname = 'serie' if table == 'outliers' else 'alias'
        for row in engine.execute(
                f'select distinct {colname} from "{namespace}-alias"."{table}"'
        ).fetchall():
            name = row[0]
            try:
                series = tsh.get(engine, name)
            except tsio.AliasError as err:
                print(err)
            else:
                if not series.index.is_monotonic_increasing:
                    print(name, 'is non monotonic')

            print(name, len(series))
Esempio n. 6
0
def audit_aliases(dburi, alias=None, namespace='tsh'):
    " perform a visual audit of aliases "
    engine = create_engine(find_dburi(dburi))

    aliases = []
    if alias:
        # verify
        if _alias_kind(engine, namespace, alias) is not None:
            aliases.append(alias)
    else:
        for kind in ('priority', 'arithmetic'):
            aliases += [
                alias for alias, in engine.execute(
                    f'select distinct alias from "{namespace}-alias".{kind}').
                fetchall()
            ]
    tsh = tsio.timeseries(namespace=namespace)

    trees = []

    for idx, alias in enumerate(aliases):
        trees.append(helpers.buildtree(engine, tsh, alias, []))

    # now, display shit
    for tree in trees:
        print('-' * 70)
        helpers.showtree(tree)
Esempio n. 7
0
def remove_alias(dburi, alias_type, alias, namespace='tsh'):
    "remove singe alias"
    engine = create_engine(find_dburi(dburi))
    table = '"{}-alias".{}'.format(namespace, alias_type)
    sql = "delete from {} where alias = %(alias)s".format(table)
    with engine.begin() as cn:
        cn.execute(sql, alias=alias)
Esempio n. 8
0
def history(db_uri,
            seriename,
            from_insertion_date,
            to_insertion_date,
            from_value_date,
            to_value_date,
            diff,
            json,
            namespace='tsh'):
    """show a serie full history """
    engine = create_engine(find_dburi(db_uri))

    tsh = timeseries(namespace)
    with engine.begin() as cn:
        hist = tsh.history(cn,
                           seriename,
                           from_insertion_date,
                           to_insertion_date,
                           from_value_date,
                           to_value_date,
                           diffmode=diff)
    if json:
        out = {
            str(idate):
            {str(vdate): val
             for vdate, val in ts.to_dict().items()}
            for idate, ts in hist.items()
        }
        print(dumps(out))
    else:
        for idate in hist:
            print(hist[idate])
Esempio n. 9
0
def test_formula(db_uri, formula, pdbshell=False, namespace='tsh'):
    engine = create_engine(find_dburi(db_uri))
    tsh = timeseries(namespace)

    ts = tsh.eval_formula(engine, formula)
    print(ts)
    if pdbshell:
        import ipdb
        ipdb.set_trace()
Esempio n. 10
0
def migrate_dot_four_to_dot_five(dburi, namespace='tsh'):
    engine = create_engine(find_dburi(dburi))

    for table in ('outliers', 'arithmetic', 'priority'):
        sql = f'alter table "{namespace}-alias".{table} set schema {namespace} '
        with engine.begin() as cn:
            cn.execute(sql)
    with engine.begin() as cn:
        cn.execute(f'drop schema "{namespace}-alias"')
Esempio n. 11
0
def export_aliases(dburi, aliases, namespace='tsh'):
    engine = create_engine(find_dburi(dburi))
    tsh = tsio.timeseries(namespace=namespace)

    trees = []
    for alias in aliases:
        trees.append(helpers.buildtree(engine, tsh, alias, []))

    data = {'primary': set(), 'arithmetic': set(), 'priority': set()}

    def collect(tree):
        if isinstance(tree, str):
            if tree.startswith('unknown'):
                print(f'skipping {tree}')
            else:
                data['primary'].add(tree)
            return

        for (alias, kind), subtrees in tree.items():
            data[kind].add(alias)
            for subtree in subtrees:
                collect(subtree)

    for tree in trees:
        collect(tree)

    Path('primary.csv').write_bytes('\n'.join(data['primary']).encode('utf-8'))

    arith = []
    for name in data['arithmetic']:
        out = engine.execute(
            f'select alias, serie, coefficient, fillopt '
            f'from "{namespace}-alias".arithmetic '
            f'where alias = %(name)s '
            f'order by alias',
            name=name).fetchall()
        arith.extend(dict(row) for row in out)

    df = pd.DataFrame(arith)
    df.to_csv(Path('arith.csv'),
              columns=('alias', 'serie', 'coefficient', 'fillopt'),
              index=False)

    prio = []
    for name in data['priority']:
        out = engine.execute(
            f'select alias, serie, priority, coefficient, prune '
            f'from "{namespace}-alias".priority '
            f'where alias = %(name)s '
            f'order by alias, priority asc',
            name=name).fetchall()
        prio.extend(dict(row) for row in out)

    df = pd.DataFrame(prio)
    df.to_csv(Path('prio.csv'),
              columns=('alias', 'serie', 'priority', 'coefficient', 'prune'),
              index=False)
Esempio n. 12
0
def get(db_uri, seriename, json, namespace='tsh'):
    """show a serie in its current state """
    engine = create_engine(find_dburi(db_uri))
    tsh = timeseries(namespace)

    ts = tsh.get(engine, seriename)
    if json:
        print(ts.to_json())
    else:
        with pd.option_context('display.max_rows', None, 'display.max_columns',
                               3):
            print(ts)
Esempio n. 13
0
def rename(db_uri, mapfile, namespace='tsh'):
    """rename series by providing a map file (csv format)

    map file header must be `old,new`
    """
    seriesmap = {p.old: p.new for p in pd.read_csv(mapfile).itertuples()}
    engine = create_engine(find_dburi(db_uri))
    tsh = timeseries(namespace)
    for old, new in seriesmap.items():
        with engine.begin() as cn:
            print('rename', old, '->', new)
            tsh.rename(cn, old, new)
Esempio n. 14
0
def reset_aliases(dburi, only=None, namespace='tsh'):
    " remove aliases wholesale (all or per type using --only) "
    if only is None:
        tables = TABLES
    else:
        assert only in TABLES
        tables = [only]

    engine = create_engine(find_dburi(dburi))
    for table in tables:
        with engine.begin() as cn:
            cn.execute(f'delete from "{namespace}-alias"."{table}"')
Esempio n. 15
0
def ingest_formulas(dburi, formula_file, strict=False, namespace='tsh'):
    """ingest a csv file of formulas

    Must be a two-columns file with a header "name,text"
    """
    engine = create_engine(find_dburi(dburi))
    df = pd.read_csv(formula_file)
    tsh = timeseries(namespace)
    with engine.begin() as cn:
        for row in df.itertuples():
            print('ingesting', row.name)
            tsh.register_formula(cn, row.name, row.text, strict)
Esempio n. 16
0
def typecheck_formula(db_uri, pdbshell=False, namespace='tsh'):
    engine = create_engine(find_dburi(db_uri))
    tsh = timeseries(namespace)

    i = Interpreter(engine, tsh, {})
    for name, kind in tsh.list_series(engine).items():
        if kind != 'formula':
            continue

        formula = tsh.formula(engine, name)
        parsed = parse(formula)
        print(name, f'`{parsed[0]}`')
        typecheck(parsed, env=i.env)
Esempio n. 17
0
def list_mismatch(db_uri, namespace='tsh'):
    e = create_engine(find_dburi(db_uri))

    tsh = timeseries(namespace)
    series = set(tsh.list_series(e))
    upstream = set(tsh.upstream.list_series(e))
    diff = upstream - series
    if not diff:
        print('no mismatch')
        return

    print(f'found {len(diff)} series in upstream')
    for name in sorted(diff):
        assert (False, True) == (tsh.exists(e, name),
                                 tsh.upstream.exists(e, name))
        print(name)
Esempio n. 18
0
def delete(db_uri, series=None, deletefile=None, namespace='tsh'):
    """delete series by providing a one-column file (csv format)

    file header must be `name`
    """
    if not (series or deletefile):
        print('You must provide a series name _or_ a csv file path')
        return

    if deletefile:
        series = [p.name for p in pd.read_csv(deletefile).itertuples()]
    else:
        series = [series]

    engine = create_engine(find_dburi(db_uri))
    delete_series(engine, series, namespace)
Esempio n. 19
0
def update_metadata(dburi, reset=False, namespace='tsh'):
    engine = create_engine(find_dburi(dburi))
    tsh = timeseries(namespace)

    if reset:
        for name, kind in tsh.list_series(engine).items():
            if kind != 'formula':
                continue
            # reset
            meta = tsh.metadata(engine, name)
            if meta:
                meta = {k: v for k, v in meta.items() if k not in tsh.metakeys}
            else:
                meta = {}
            sql = (f'update "{namespace}".formula '
                   'set metadata = %(metadata)s '
                   'where name = %(name)s')
            print('reset', name, 'to', meta)
            with engine.begin() as cn:
                cn.execute(sql, metadata=json.dumps(meta), name=name)

    todo = []
    errors = []

    def justdoit():
        for name, kind in tsh.list_series(engine).items():
            if kind != 'formula':
                continue
            print(name)

            tree = parse(tsh.formula(engine, name))
            smap = tsh.find_series(engine, tree)
            try:
                meta = tsh.filter_metadata(smap)
            except ValueError as err:
                errors.append((name, err))
                continue
            if not meta or 'index_dtype' not in meta:
                todo.append(name)
                print(' -> todo')
                continue
            tsh.update_metadata(engine, name, meta)

    justdoit()

    print('TODO', todo)
    print('FAIL', errors)
Esempio n. 20
0
def log(db_uri,
        limit,
        series,
        from_insertion_date=None,
        to_insertion_date=None,
        namespace='tsh'):
    """show revision history of entire repository or series"""
    engine = create_engine(find_dburi(db_uri))
    tsh = timeseries(namespace)

    for rev in tsh.log(engine,
                       series,
                       limit=limit,
                       fromdate=from_insertion_date,
                       todate=to_insertion_date):
        print(format_rev(rev))
        print()
Esempio n. 21
0
def drop_alias_tables(db_uri, drop=False, namespace='tsh'):
    engine = create_engine(find_dburi(db_uri))

    # convert outliers to clip operator

    elts = {
        k: (min, max)
        for k, min, max in engine.execute(
            'select serie, min, max from tsh.outliers').fetchall()
    }
    tsh = timeseries(namespace)
    rewriteme = []
    for name, kind in tsh.list_series(engine).items():
        if kind != 'formula':
            continue
        tree = parse(tsh.formula(engine, name))
        smap = tsh.find_series(engine, tree)
        for sname in smap:
            if sname in elts:
                rewriteme.append((name, tree))
                break

    for name, tree in rewriteme:
        tree2 = rewrite(tree, elts)
        print(name)
        print(serialize(tree))
        print('->')
        print(serialize(tree2))
        print()
        tsh.register_formula(engine, name, serialize(tree2), update=True)

    if not drop:
        print('DID NOT DROP the tables')
        print('pass --drop to really drop them')
        return

    with engine.begin() as cn:
        cn.execute(f'drop table if exists "{namespace}".arithmetic')
        cn.execute(f'drop table if exists "{namespace}".priority')
        cn.execute(f'drop table if exists "{namespace}".outliers')
Esempio n. 22
0
def fix_slice(db_uri, really=False, namespace='tsh'):
    e = create_engine(find_dburi(db_uri))

    tsh = timeseries(namespace)
    for name, kind in tsh.list_series(e).items():
        if kind != 'formula':
            continue

        # parse+serialize -> normalization step
        form = serialize(parse(tsh.formula(e, name)))
        tree = parse(form)
        newtree = rewrite_slice(tree)
        newform = serialize(newtree)
        if form != newform:
            print('rewritten', name)
            print(' was', form)
            print(' ->', newform)
            if not really:
                continue
            tsh.register_formula(e, name, newform, update=True)

    if not really:
        print('UNCHANGED. To apply changes, pass --really')
Esempio n. 23
0
def register_priorities(dburi, priority_file, override=False):
    " register priorities timeseries aliases "
    engine = create_engine(find_dburi(dburi))
    with engine.begin() as cn:
        db.register_priority(cn, priority_file, override)
Esempio n. 24
0
def register_arithmetic(dburi, arithmetic_file, override):
    " register arithmetic timeseries aliases "
    engine = create_engine(find_dburi(dburi))
    with engine.begin() as cn:
        db.register_arithmetic(cn, arithmetic_file, override)
Esempio n. 25
0
def register_outliers(dburi, outliers_file, override):
    " register outlier definitions "
    engine = create_engine(find_dburi(dburi))
    with engine.begin() as cn:
        db.register_outliers(cn, outliers_file, override)
Esempio n. 26
0
def init_db(db_uri, namespace):
    "initialize the formula part of a timeseries history schema"
    engine = create_engine(find_dburi(db_uri))
    formula_schema(namespace).create(engine)
Esempio n. 27
0
def shell(db_uri, namespace='tsh'):
    e = create_engine(find_dburi(db_uri))

    tsh = timeseries(namespace)
    import pdb
    pdb.set_trace()
Esempio n. 28
0
def init_db(db_uri, reset=False, namespace='tsh'):
    """initialize an new db."""
    engine = create_engine(find_dburi(db_uri))
    schem = tshistory.schema.tsschema(namespace)
    schem.create(engine)