Esempio n. 1
0
def test_new_db_functions_backwards_compatible():
    conn = _create_connection()
    cursor = conn.cursor()

    old_query = 'SELECT * FROM batched_get_metric(%s, %s, %s, %s, %s)'
    cursor.execute(old_query, ('submission_date', 'nightly', '41', [
        SUBMISSION_DATE_1.strftime(DATE_FMT)
    ], json.dumps({
        'metric': 'GC_MAX_PAUSE_MS_2',
        'child': 'true'
    })))

    # Just 1 result since this is 1 date and not a keyed histogram
    result = cursor.fetchall()
    assert len(result) == 1, result

    new_query = 'SELECT * FROM batched_get_metric(%s, %s, %s, %s, %s, %s)'
    cursor.execute(new_query, ('submission_date', 'nightly', '41', [
        SUBMISSION_DATE_1.strftime(DATE_FMT)
    ], json.dumps({
        'metric': 'GC_MAX_PAUSE_MS_2',
        'child': 'true'
    }), json.dumps({'metric': 'DEVTOOLS_PERFTOOLS_RECORDING_FEATURES_USED'})))

    # 1 for the non-keyed histogram, 1 for the 1 key of the keyed histogram
    # Note we don't actually use batched_get_metric for multiple metrics,
    # but this behavior is expected
    assert len(cursor.fetchall()) == 2
def test_trim_db_cli(conn):
    cursor = conn.cursor()
    list_tables = "select tablename from pg_catalog.pg_tables where schemaname='public'"
    cursor.execute(list_tables)
    before = {row[0] for row in cursor.fetchall()}

    result = CliRunner().invoke(
        trim_db.main,
        [
            "--base-date",
            SUBMISSION_DATE_1.strftime(DATE_FMT),
            "--retention-period",
            1,
            "--dry-run",
            "--postgres-db",
            "postgres",
            "--postgres-user",
            "postgres",
            "--postgres-pass",
            "pass",
            "--postgres-host",
            "db",
        ],
        catch_exceptions=False,
    )
    assert result.exit_code == 0, result.output
    cursor.execute(list_tables)
    after = {row[0] for row in cursor.fetchall()}
    assert before == after

    result = CliRunner().invoke(
        trim_db.main,
        [
            "--base-date",
            SUBMISSION_DATE_1.strftime(DATE_FMT),
            "--retention-period",
            1,
            "--no-dry-run",
            "--postgres-db",
            "postgres",
            "--postgres-user",
            "postgres",
            "--postgres-pass",
            "pass",
            "--postgres-host",
            "db",
        ],
        catch_exceptions=False,
    )
    assert result.exit_code == 0, result.output

    cursor.execute(list_tables)
    after = {row[0] for row in cursor.fetchall()}

    assert before != after
    assert all([SUBMISSION_DATE_2.strftime(DATE_FMT) in x for x in before - after]), (
        before - after
    )
Esempio n. 3
0
def test_aggregation_cli(tmp_path, mock_dataset):
    test_creds = str(tmp_path / "creds")
    # generally points to the production credentials
    creds = {"DB_TEST_URL": "dbname=postgres user=postgres host=db"}
    with open(test_creds, "w") as f:
        json.dump(creds, f)

    result = CliRunner().invoke(
        run_aggregator,
        [
            "--date",
            SUBMISSION_DATE_1.strftime('%Y%m%d'),
            "--channels",
            "nightly,beta",
            "--credentials-protocol",
            "file",
            "--credentials-bucket",
            "/",
            "--credentials-prefix",
            test_creds,
            "--num-partitions",
            10,
        ],
        catch_exceptions=False,
    )

    assert result.exit_code == 0, result.output
    assert_new_db_functions_backwards_compatible()
Esempio n. 4
0
def test_aggregation_cli_credentials_option(mock_dataset):
    empty_env = {
        "DB_TEST_URL": "",
        "POSTGRES_DB": "",
        "POSTGRES_USER": "",
        "POSTGRES_PASS": "",
        "POSTGRES_HOST": "",
        "POSTGRES_RO_HOST": ","
    }
    options = [
        "--postgres-db", "postgres", "--postgres-user", "postgres",
        "--postgres-pass", "pass", "--postgres-host", "db",
        "--postgres-ro-host", "db"
    ]
    result = CliRunner().invoke(
        run_aggregator,
        [
            "--date",
            SUBMISSION_DATE_1.strftime('%Y%m%d'),
            "--channels",
            "nightly,beta",
            "--num-partitions",
            10,
        ] + options,
        env=empty_env,
        catch_exceptions=False,
    )

    assert result.exit_code == 0, result.output
    assert_new_db_functions_backwards_compatible()

    # now test that missing an option will exit with non-zero
    result = CliRunner().invoke(
        run_aggregator,
        [
            "--date",
            SUBMISSION_DATE_1.strftime('%Y%m%d'),
            "--channels",
            "nightly,beta",
            "--num-partitions",
            10,
        ] + options[:2],  # missing ro_host
        env=empty_env,
        catch_exceptions=False,
    )
    assert result.exit_code == 1
def test_query_submission_date(conn):
    cursor = conn.cursor()
    dates = [SUBMISSION_DATE_1.strftime(DATE_FMT), SUBMISSION_DATE_2.strftime(DATE_FMT)]
    retain, trim = trim_db.query_submission_date(cursor, set(dates))
    assert len(trim) == 0
    assert len(retain) > 2  # each tablename includes the dimensions
    assert {trim_db.extract_ds_nodash(table) for table in retain} == set(dates)
    assert all(["submission_date" in table for table in retain])

    retain, trim = trim_db.query_submission_date(cursor, set(dates[:1]))
    assert {trim_db.extract_ds_nodash(table) for table in retain} == set(dates[:1])
    assert {trim_db.extract_ds_nodash(table) for table in trim} == set(dates[1:])
Esempio n. 6
0
def test_new_db_functions_backwards_compatible():
    conn = _create_connection()
    cursor = conn.cursor()

    old_query = 'SELECT * FROM batched_get_metric(%s, %s, %s, %s, %s)'
    cursor.execute(old_query, (
        'submission_date', 'nightly', '41', [SUBMISSION_DATE_1.strftime(DATE_FMT)],
        json.dumps({'metric': 'GC_MAX_PAUSE_MS_2', 'child': 'true'})))

    # Just 1 result since this is 1 date and not a keyed histogram
    result = cursor.fetchall()
    assert len(result) == 1, result

    new_query = 'SELECT * FROM batched_get_metric(%s, %s, %s, %s, %s, %s)'
    cursor.execute(new_query, (
        'submission_date', 'nightly', '41', [SUBMISSION_DATE_1.strftime(DATE_FMT)],
        json.dumps({'metric': 'GC_MAX_PAUSE_MS_2', 'child': 'true'}),
        json.dumps({'metric': 'DEVTOOLS_PERFTOOLS_RECORDING_FEATURES_USED'})))

    # 1 for the non-keyed histogram, 1 for the 1 key of the keyed histogram
    # Note we don't actually use batched_get_metric for multiple metrics,
    # but this behavior is expected
    assert len(cursor.fetchall()) == 2
def test_trim_tables(conn):
    cursor = conn.cursor()
    list_tables = "select tablename from pg_catalog.pg_tables where schemaname='public'"
    cursor.execute(list_tables)
    full = {row[0] for row in cursor.fetchall()}

    dates = [SUBMISSION_DATE_1.strftime(DATE_FMT), SUBMISSION_DATE_2.strftime(DATE_FMT)]
    retain, trim = trim_db.query_submission_date(cursor, set(dates[:1]))

    expect = full - trim
    trim_db.trim_tables(conn, trim)
    conn.commit()
    cursor.execute(list_tables)
    actual = {row[0] for row in cursor.fetchall()}

    assert expect == actual
Esempio n. 8
0
def test_aggregation_cli_no_credentials_file(mock_dataset):
    result = CliRunner().invoke(
        run_aggregator,
        [
            "--date",
            SUBMISSION_DATE_1.strftime('%Y%m%d'),
            "--channels",
            "nightly,beta",
            "--num-partitions",
            10,
        ],
        env={
            "DB_TEST_URL": "",
            "POSTGRES_DB": "postgres",
            "POSTGRES_USER": "******",
            "POSTGRES_PASS": "******",
            "POSTGRES_HOST": "db",
            "POSTGRES_RO_HOST": "db",
        },
        catch_exceptions=False,
    )

    assert result.exit_code == 0, result.output
    assert_new_db_functions_backwards_compatible()