Exemple #1
0
def check_data_valume_diff(db, table):
    from_time = metrics_db.execute(
        text("""
        SELECT max(created_at) as created_at
        FROM metrics_data_volume_diff
        WHERE table_id = :table_id
        """), {
            'table_id': table.id
        }).first()
    from_time = from_time.created_at if from_time else None

    if from_time is None:
        # if now previous diff computed, compute from start of day
        # mostly because we show that stat daily
        from_time = datetime.combine(date.today(), time())

    try:
        result = db.check_data_volume(table, where_timecol=f">= '{from_time}'")
    except AttributeError:
        result = db.execute(
            text(f"""
            SELECT count(*) as count
            FROM {table.table_name}
            WHERE {table.time_column} >= :from_time
        """), {
                'from_time': from_time
            }).first()

    metrics_data_valume = metadata.tables['metrics_data_volume_diff']

    stmt = metrics_data_valume.insert().values(table_id=table.id,
                                               from_time=from_time,
                                               count=result.count)

    metrics_db.execute(stmt)
Exemple #2
0
def insert_schema_changed_record(table, operation, column_name, column_type,
                                 column_count):
    metrics_data_valume = metadata.tables['metrics_table_schema_changes']

    stmt = metrics_data_valume.insert().values(table_id=table.id,
                                               operation=operation,
                                               column_name=column_name,
                                               column_type=column_type,
                                               column_count=column_count)
    metrics_db.execute(stmt)
Exemple #3
0
def create_sample_tables_in_redata():

    partners = [
        'team1', 'team2', 'team3', 'team4', 'team5', 'team6', 'team7', 'c1',
        'c2', 'c3', 'c4', 'c5', 'c6', 'c7'
    ]

    types = [
        'OPEN',
        'CLICK',
        'EXIT',
    ]

    metrics_db.execute("CREATE SCHEMA IF NOT EXISTS sample")
    for partner in partners:
        metrics_db.execute(EVENTS.substitute(partner=partner))
    metrics_db.execute(SESSION)
    metrics_db.execute(REPORT)

    num_events = random.randint(5000, 10000)
    utc_now = datetime.utcnow()

    secs_in_month = 3000000

    failure_range = [1200000, 1400000]

    higher_values_range = [200000, 600000]

    nulls_show_up = [800000, 1000000]

    for event in range(num_events):
        print("Creating event ", event)
        how_much_back = random.randint(0, secs_in_month)
        rand_value = random.random() * 100
        random_uid = 'uid_{}'.format(random.randint(0, 1000))

        if failure_range[0] < how_much_back < failure_range[1]:
            how_much_back = random.randint(0, secs_in_month)

        if higher_values_range[0] < how_much_back < higher_values_range[1]:
            rand_value = random.random() * 100 * 5 + 50

        if nulls_show_up[0] < how_much_back < nulls_show_up[1]:
            random_uid = None

        created_at = utc_now - timedelta(seconds=how_much_back)

        metrics_db.execute(
            insert_to_events.substitute(partner=random.choice(partners),
                                        event_type=random.choice(types),
                                        uid=random_uid,
                                        value=rand_value,
                                        created_at=created_at))
Exemple #4
0
def check_data_delayed(db, table):

    age_fun = db.get_age_function()

    result = db.execute(f"""
        SELECT 
            {age_fun}(now(), max({table.time_column}))
        FROM {table.table_name}
    """).fetchall()[0]

    metrics_data_delay = metadata.tables['metrics_data_delay']
    stmt = metrics_data_delay.insert().values(table_id=table.id,
                                              value=result[0].total_seconds())
    metrics_db.execute(stmt)
Exemple #5
0
def check_count_per_value(db, table, checked_column, time_interval):

    try:
        result = db.check_count_per_value(table, checked_column, time_interval)
    except AttributeError:
        sep = db.get_interval_sep()
        check_distinct = db.execute(f"""
            SELECT
                count(distinct({checked_column})) as count
            FROM {table.table_name}
            WHERE
                {table.time_column} > now() - INTERVAL {sep}{time_interval}{sep}
        """).first()

        if check_distinct.count > 10:
            # Skipping if more than 10 different values showing up in column
            return

        result = db.execute(f"""
            SELECT
                count(*) as count,
                {checked_column} as value
            FROM
                {table.table_name}
            WHERE
                {table.time_column} > now() - INTERVAL {sep}{time_interval}{sep} and
                {checked_column} is not null
            GROUP BY
                {checked_column}
            ORDER BY
                count DESC
            LIMIT 10
        """)

    metrics_data_values = metadata.tables['metrics_data_values']
    for row in result:
        stmt = metrics_data_values.insert().values(
            table_id=table.id,
            column_name=checked_column,
            column_value=row.value,
            check_name='check_count_per_value',
            check_value=row.count,
            time_interval=time_interval)

        metrics_db.execute(stmt)
Exemple #6
0
def check_generic(func_name, db, table, checked_column, time_interval):
    sep = db.get_interval_sep()
    result = db.execute(f"""
        SELECT
            {func_name}({checked_column}) as value
        FROM
            {table.table_name}
        WHERE {table.time_column} > now() - INTERVAL {sep}{time_interval}{sep}
    """).first()

    metrics_data_values = metadata.tables['metrics_data_values']
    stmt = metrics_data_values.insert().values(table_id=table.id,
                                               column_name=checked_column,
                                               check_name=f'check_{func_name}',
                                               check_value=result.value,
                                               time_interval=time_interval)

    metrics_db.execute(stmt)
Exemple #7
0
def check_data_volume(db, table, time_interval):

    sep = db.get_interval_sep()
    result = db.execute(f"""
        SELECT
            count(*) as count
        FROM {table.table_name}
        WHERE {table.time_column} > now() - INTERVAL {sep}{time_interval}{sep}
        
    """).first()

    metrics_data_valume = metadata.tables['metrics_data_volume']

    stmt = metrics_data_valume.insert().values(
        table_id=table.id,
        time_interval=time_interval,
        count=result.count
    )
    
    metrics_db.execute(stmt)
Exemple #8
0
def check_count_nulls(db, table, checked_column, time_interval):

    sep = db.get_interval_sep()
    result = db.execute(f"""
        SELECT
            count(*) as value
        FROM
            {table.table_name}
        WHERE
            {table.time_column} > now() - INTERVAL {sep}{time_interval}{sep} and
            {checked_column} is null
    """).first()

    metrics_data_values = metadata.tables['metrics_data_values']
    stmt = metrics_data_values.insert().values(table_id=table.id,
                                               column_name=checked_column,
                                               check_name='check_count_nulls',
                                               check_value=result.value,
                                               time_interval=time_interval)

    metrics_db.execute(stmt)
Exemple #9
0
def check_data_volume(db, table, time_interval):

    try:
        interval_part = db.make_interval(time_interval)
        result = db.check_data_volume(
            table, where_timecol=f"> now() - {interval_part}")
    except AttributeError:
        sep = db.get_interval_sep()
        result = db.execute(f"""
            SELECT
                count(*) as count
            FROM {table.table_name}
            WHERE {table.time_column} > now() - INTERVAL {sep}{time_interval}{sep}
            
        """).first()

    metrics_data_valume = metadata.tables['metrics_data_volume']

    stmt = metrics_data_valume.insert().values(table_id=table.id,
                                               time_interval=time_interval,
                                               count=result.count)

    metrics_db.execute(stmt)
Exemple #10
0
def setup_metrics():
    metrics_db.execute("""CREATE TABLE IF NOT EXISTS metrics_data_delay (
        table_id integer,
        value integer,
        created_at timestamp default now()
        );""")
    metrics_db.execute(
        """CREATE TABLE IF NOT EXISTS metrics_table_schema_changes (
        table_id integer,
        column_name text,
        column_type text,
        column_count integer,
        operation text,
        created_at timestamp default now()
        )""")
    metrics_db.execute("""CREATE TABLE IF NOT EXISTS metrics_data_volume (
        table_id integer,
        time_interval text,
        count bigint,
        created_at timestamp default now()
        );
        """)
    metrics_db.execute("""CREATE TABLE IF NOT EXISTS metrics_data_volume_diff (
        table_id integer,
        from_time timestamp,
        count bigint,
        created_at timestamp default now()
        )""")
    metrics_db.execute("""CREATE TABLE IF NOT EXISTS metrics_data_values (
        table_id integer,
        column_name text,
        column_value text,
        check_name text,
        check_value double precision,
        time_interval text,
        created_at timestamp default now()
        )""")
Exemple #11
0
def check_data_volume_diff(db, table):
    from_time = metrics_db.execute(
        text("""
        SELECT max(created_at) as created_at
        FROM metrics_data_volume_diff
        WHERE table_id = :table_id
        """), {
            'table_id': table.id
        }).first()
    from_time = from_time.created_at if from_time else None

    if from_time is None:
        # if now previous diff computed, compute from start of day
        # mostly because we show that stat daily
        from_time = datetime.combine(date.today(), time())

    result = db.check_data_volume_diff(table, from_time=from_time)

    for r in (result or []):
        metric = MetricsDataVolumeDiff(table_id=table.id,
                                       date=r.date,
                                       count=r.count)
        metrics_session.add(metric)
    metrics_session.commit()
Exemple #12
0
def get_dashboard_for_table(db, table):
    table_data = load_json_data(settings.TABLE_DASHBOARD_LOCATION)

    panels = table_data['panels']
    per_title = dict([(panel.title(), panel) for panel in ALL_PANELS])

    table_data[
        'title'] = f"source: {db.name} table: {table.table_name} (generated)"

    for panel in panels:
        if per_title.get(panel['title']):
            panel = update_panel_element(table, panel,
                                         per_title[panel['title']])

    all_checks = metrics_db.execute(f"""
        SELECT DISTINCT column_name, check_name
        FROM metrics_data_values
        WHERE table_id = '{table.id}'
        """)

    next_id = 20
    y_pos = 20
    x_pos = 0

    check_per_column = {}
    for column_name, check_name in all_checks:
        if column_name not in check_per_column:
            check_per_column[column_name] = [check_name]
        else:
            check_per_column[column_name].append(check_name)

    for column_name in sorted(check_per_column.keys()):
        if x_pos != 0:
            x_pos = 0
            y_pos += 7

        panel = load_json_data(settings.CUSTOM_ROW_LOCATION)
        panel['title'] = f"{column_name} column"
        panel['id'] = next_id
        panel['gridPos']["y"] = y_pos
        panel['gridPos']["x"] = x_pos

        next_id += 1
        y_pos += 1

        panels.append(panel)

        checks = check_per_column[column_name]

        for check_name in checks:
            panel = load_json_data(settings.CUSTOM_PANEL_LOCATION)
            panel['title'] = f"{check_name}"
            panel['id'] = next_id
            panel['gridPos']["y"] = y_pos
            panel['gridPos']["x"] = x_pos

            next_id += 1
            if x_pos == 0:
                x_pos += 12
            else:
                x_pos = 0
                y_pos += 7

            if check_name != 'check_count_per_value':
                panel = update_panel_element(table,
                                             panel,
                                             CheckForColumn,
                                             column_name=column_name,
                                             check_name=check_name)
            else:
                panel = update_panel_element(table,
                                             panel,
                                             CheckForColumnByValue,
                                             column_name=column_name,
                                             check_name=check_name,
                                             time_interval='1 day')
            panels.append(panel)

    return table_data