Python add Examples, redata.db_operations.metrics_session.add Python Examples

Example #1

0

Show file

File: base.py Project: patrika1979/redata

def alert_on_z_score(df, check, alert_type, checked_txt, conf):
    df = df[df["result"].notnull()]

    if len(df) <= 1:
        return

    last_el_zscore = stats.zscore(df["result"])[-1]
    last_el = df["result"].iloc[-1]

    if math.isnan(last_el_zscore):
        return

    if abs(last_el_zscore) > settings.ACCEPTABLE_Z_SCORE_DIFF:

        alert_desc = "above" if last_el_zscore > 0 else "below"

        alert = Alert(
            text=f"""
                {checked_txt},
                {alert_desc} expected range, value: {last_el}, z_score: {last_el_zscore:.2f}
            """,
            severity=2,
            table_id=check.table_id,
            alert_type=alert_type,
            created_at=conf.for_time,
        )

        metrics_session.add(alert)
        metrics_session.commit()

Example #2

0

Show file

def add_run():
    run = Run(for_date=datetime.utcnow(),
              status='not started',
              run_type='scheduled')

    metrics_session.add(run)
    metrics_session.commit()

Example #3

0

Show file

def create_column_checks(db, table):

    metrics = {}

    for col in table.schema["columns"]:
        if col["name"] in settings.SKIP_COLUMNS:
            continue
        if col["type"] not in db.numeric_types() + db.character_types():
            continue

        checks_for_col = []
        if col["type"] in db.numeric_types():
            checks_for_col = [el for el in Metric.FOR_NUMERICAL_COL]
        elif col["type"] in db.character_types():
            checks_for_col = [el for el in Metric.FOR_TEXT_COL]

        metrics[col["name"]] = checks_for_col

    check = Check(
        table_id=table.id,
        name="column_values",
        metrics=metrics,
        query={
            "type": "standard",
            "path": f"redata.checks.data_values.check_column_values",
            "params": {
                "time_interval": "1 day"
            },
        },
    )

    metrics_session.add(check)
    metrics_session.commit()

Example #4

0

Show file

File: check_alert.py Project: jessewei/redata

def alert_for_schema_change(db, check, conf):

    df = get_last_results(db,
                          check,
                          Metric.TABLE_METRIC,
                          Metric.SCHEMA_CHANGE,
                          conf,
                          days=1)
    for index, row in df.iterrows():

        changes = json.loads(row[0])
        if changes["operation"] == "table detected":
            continue

        alert = Alert(
            text=f"""
                schema change detected - {changes['operation']}: {changes['column_name']}
            """,
            severity=2,
            table_id=check.table_id,
            alert_type=check.name,
            created_at=conf.for_time,
        )

        metrics_session.add(alert)
        metrics_session.commit()

Example #5

0

Show file

File: create.py Project: igorivanov/redata

def create_column_checks(db, table):

    metrics = {}

    for col in table.schema['columns']:
        if col['name'] in settings.SKIP_COLUMNS:
            continue
        if col['type'] not in db.numeric_types() + db.character_types():
            continue

        checks_for_col = []
        if col['type'] in db.numeric_types():
            checks_for_col = [el for el in Metric.FOR_NUMERICAL_COL]
        elif col['type'] in db.character_types():
            checks_for_col = [el for el in Metric.FOR_TEXT_COL]

        metrics[col['name']] = checks_for_col

    check = Check(table_id=table.id,
                  name='column_values',
                  metrics=metrics,
                  query={
                      'type': 'standard',
                      'path': f'redata.checks.data_values.check_column_values',
                      'params': {
                          'time_interval': '1 day'
                      }
                  })

    metrics_session.add(check)
    metrics_session.commit()

Example #6

0

Show file

File: base.py Project: l0n3r4n83r/redata

def alert_on_z_score(df, table, check_col, alert_type, checked_txt):
    df = df[df[check_col].notnull()]

    if len(df) <= 1:
        return

    last_el_zscore = stats.zscore(df[check_col])[-1]
    last_el = df[check_col].iloc[-1]

    if math.isnan(last_el_zscore):
        return

    if abs(last_el_zscore) > settings.ACCEPTABLE_Z_SCORE_DIFF:

        alert_desc = 'above' if last_el_zscore > 0 else 'below'

        print(f"Adding alert about table {table.table_name}")

        alert = Alert(text=f"""
                {checked_txt},
                {alert_desc} expected range, value: {last_el}, z_score: {last_el_zscore:.2f}
            """,
                      severity=2,
                      table_id=table.id,
                      alert_type=alert_type)

        metrics_session.add(alert)
        metrics_session.commit()

Example #7

0

Show file

File: data_schema.py Project: zuba0/redata

def insert_schema_changed_record(table, operation, column_name, column_type,
                                 column_count):
    metric = MetricsSchemaChanges(table_id=table.id,
                                  operation=operation,
                                  column_name=column_name,
                                  column_type=column_type,
                                  column_count=column_count)
    metrics_session.add(metric)
    metrics_session.commit()

Example #8

0

Show file

def check_data_volume(db, table, time_interval):
    result = db.check_data_volume(table, time_interval)

    metric = MetricsDataVolume(table_id=table.id,
                               time_interval=time_interval,
                               count=result.count)

    metrics_session.add(metric)
    metrics_session.commit()

Example #9

0

Show file

File: table.py Project: gab-shell/redata

    def setup_for_source_table(cls, db, db_table_name):
        print (f"Running setup for {db_table_name}")

        valid_types = db.datetime_types()
        schema_cols = get_current_table_schema(db, db_table_name)

        table = MonitoredTable(
            table_name=db_table_name,
            schema={'columns': schema_cols},
            source_db=db.name
        )

        # heuristics to find best column to sort by when computing stats about data
        # TODO: could probably look up in a provided table of regex + score, with higher scored matches being preferred

        # list all date/timestamp columns, filtering out anything that's blacklisted in configuration
        blacklist_regex = settings.REDATA_TIME_COL_BLACKLIST_REGEX
        matching_cols = [col['name'] for col in schema_cols if col['type'] in valid_types and re.search(blacklist_regex, col['name']) is None]

        # from matches, collect time cols that have max values at or before "now"
        cols_by_ts = defaultdict(list)
        now_ts = datetime.datetime.now()
        for col in matching_cols:
            max_ts = db.get_max_timestamp(table, col)
            if max_ts <= now_ts:
                cols_by_ts[max_ts].append(col)

        # list of all viable candidates, ordered by latest timestamp first
        candidates = list(itertools.chain(
            *[cols for ts, cols in sorted(cols_by_ts.items(), reverse=True)]
        ))

        # list of preferred columns out of the viable ones, by name filtering
        preferred = [col for col in candidates if col.lower().find('creat') != -1]

        if len(candidates) == 0:
            # no columns found? ignore table..
            # TODO: add it, but set to disabled, for screening via web UI when we have one
            print (f"Not found column to sort by for {db_table_name}, skipping it for now")
            return None
        else:
            # if multiple columns found, primarily select from 'preferred' if exists, then set up the table
            col_name = preferred[0] if preferred else candidates[0]
            col_type = [col['type'] for col in schema_cols if col['name'] == col_name][0]

            if len(candidates) > 1:
                print (f"Found multiple columns to sort by {candidates}, choosing {col_name}, please update in DB if needed")
            else:
                print (f"Found column to sort by {col_name}")

            table.time_column=col_name
            table.time_column_type=col_type

            metrics_session.add(table)
            metrics_session.commit()
            return table

Example #10

0

Show file

File: schedule_checks.py Project: patrika1979/redata

def add_run():
    scan = Scan(
        start_date=datetime.utcnow(),
        end_date=datetime.utcnow(),
        status="not started",
        run_type="scheduled",
    )

    metrics_session.add(scan)
    metrics_session.commit()

Example #11

0

Show file

def check_data_delayed(db, table, conf):
    result = db.check_data_delayed(table, conf)

    if result[0]:
        metric = MetricsDataDelay(table_id=table.id,
                                  value=result[0].total_seconds(),
                                  created_at=conf.for_time)

        metrics_session.add(metric)
        metrics_session.commit()

Example #12

0

Show file

def check_data_delayed(db, table):
    result = db.check_data_delayed(table)

    if result[0]:
        metric = MetricsDataDelay(
            table_id=table.id,
            value=result[0].total_seconds()
        )

        metrics_session.add(metric)
        metrics_session.commit()

Example #13

0

Show file

def check_generic(func_name, db, table, checked_column, time_interval):
    result = db.check_generic(func_name, table, checked_column, time_interval)

    metric = MetricsDataValues(table_id=table.id,
                               column_name=checked_column,
                               check_name=f'check_{func_name}',
                               check_value=result.value,
                               time_interval=time_interval)

    metrics_session.add(metric)
    metrics_session.commit()

Example #14

0

Show file

def check_count_nulls(db, table, checked_column, time_interval):

    result = db.check_count_nulls(table, checked_column, time_interval)

    metric = MetricsDataValues(table_id=table.id,
                               column_name=checked_column,
                               check_name='check_count_nulls',
                               check_value=result.value,
                               time_interval=time_interval)

    metrics_session.add(metric)
    metrics_session.commit()

Example #15

0

Show file

def check_count_per_value(db, table, checked_column, time_interval):
    result = db.check_count_per_value(table, checked_column, time_interval)

    for row in (result or []):

        metric = MetricsDataValues(table_id=table.id,
                                   column_name=checked_column,
                                   column_value=row.value,
                                   check_name='check_count_per_value',
                                   check_value=row.count,
                                   time_interval=time_interval)

        metrics_session.add(metric)
    metrics_session.commit()

Example #16

0

Show file

    def create_admin_user_if_not_exist(cls):

        assert os.environ.get(
            'REDATA_ADMIN_USER'), 'please set env variable for admin user'
        assert os.environ.get('REDATA_ADMIN_PASSWORD'
                              ), 'please set env variable for admin password'

        is_admin = metrics_session.query(cls).filter(
            cls.login == os.environ.get('REDATA_ADMIN_USER')).count()
        if not is_admin:
            user = cls(login=os.environ.get('REDATA_ADMIN_USER'),
                       password=generate_password_hash(
                           os.environ.get('REDATA_ADMIN_PASSWORD')))
            metrics_session.add(user)
            metrics_session.commit()

            print("Created admin user")

Example #17

0

Show file

    def add_metrics(cls, results, check, conf):

        print(f"Adding results for check: {check}")
        for row in results:

            for col, metrics in check.metrics.items():

                for m in metrics:
                    select_name = col + '_' + m if col != Metric.TABLE_METRIC else m

                    m = MetricFromCheck(check_id=check.id,
                                        table_id=check.table.id,
                                        table_column=col if col else None,
                                        params=check.query['params'],
                                        metric=m,
                                        result={'value': row[select_name]},
                                        created_at=conf.for_time)
                    metrics_session.add(m)

            metrics_session.commit()

Example #18

0

Show file

File: create.py Project: igorivanov/redata

def create_for_detected_table(db, table):

    for check in table_checks:

        func = check['func']
        metric_dict = {Metric.TABLE_METRIC: [check['metric']]}

        model_check = Check(table_id=table.id,
                            name=check['metric'],
                            metrics=metric_dict,
                            query={
                                'type': 'standard',
                                'path': f'redata.checks.{func}',
                                'params': check['params']
                            })

        metrics_session.add(model_check)
    metrics_session.commit()

    create_column_checks(db, table)

Example #19

0

Show file

File: metrics.py Project: maciejklimek/redata

    def add_metrics(cls, results, check, conf):

        print(f"Adding results for check: {check}")
        for row in results:

            for col, metrics in check.metrics.items():

                for m in metrics:
                    select_name = name_for(col, m)

                    m = MetricFromCheck(
                        check_id=check.id,
                        table_id=check.table.id,
                        table_column=col,
                        params=check.query.get("params", {}),
                        metric=m,
                        result={"value": row[select_name]},
                        created_at=conf.for_time,
                    )
                    metrics_session.add(m)

            metrics_session.commit()

Example #20

0

Show file

File: table.py Project: mateuszklimek/redata

    def setup_for_source_table(cls, db, db_table_name):
        print (f"Running setup for {db_table_name}")

        preference = [
            'timestamp without time zone',
            'timestamp with time zone',
            'date',
            'datetime' #mysql
        ]
        schema_cols = get_current_table_schema(db, db_table_name)

        # heuristics to find best column to sort by when computing stats about data
        proper_type = [col['name'] for col in schema_cols if col['type'] in preference]
        columns = [c for c in proper_type if c.find('creat') != -1 ]

        colname, col_type = None, None

        if len(proper_type) == 0:
            print (f"Not found column to sort by for {db_table_name}, skipping it for now")
            return None
        else:
            if len(columns) > 1:
                print (f"Found multiple columns to sort by {columns}, choosing {columns[0]}, please update in DB if needed")

            col_name = columns[0] if columns else proper_type[0]
            col_type = [col['type'] for col in schema_cols if col['name'] == col_name][0]
            print (f"Found column to sort by {col_name}")

            table = MonitoredTable(
                table_name=db_table_name,
                time_column=col_name,
                time_column_type=col_type,
                schema={'columns': schema_cols},
                source_db=db.name
            )
            
            metrics_session.add(table)
            metrics_session.commit()
            return table

Example #21

0

Show file

def create_for_detected_table(db, table):

    for check in table_checks:

        func = check["func"]
        metric_dict = {Metric.TABLE_METRIC: [check["metric"]]}

        model_check = Check(
            table_id=table.id,
            name=check["metric"],
            metrics=metric_dict,
            query={
                "type": "standard",
                "path": f"redata.checks.{func}",
                "params": check["params"],
            },
        )

        metrics_session.add(model_check)
    metrics_session.commit()

    create_column_checks(db, table)

Example #22

0

Show file

def check_data_volume_diff(db, table):
    from_time = metrics_db.execute(
        text("""
        SELECT max(created_at) as created_at
        FROM metrics_data_volume_diff
        WHERE table_id = :table_id
        """), {
            'table_id': table.id
        }).first()
    from_time = from_time.created_at if from_time else None

    if from_time is None:
        # if now previous diff computed, compute from start of day
        # mostly because we show that stat daily
        from_time = datetime.combine(date.today(), time())

    result = db.check_data_volume_diff(table, from_time=from_time)

    for r in (result or []):
        metric = MetricsDataVolumeDiff(table_id=table.id,
                                       date=r.date,
                                       count=r.count)
        metrics_session.add(metric)
    metrics_session.commit()