コード例 #1
0
ファイル: test_logger_image.py プロジェクト: whylabs/whylogs
def test_log_pil_image(tmpdir, image_files):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("image_pil_test",
                        with_rotation_time="1m",
                        cache_size=1) as logger:

        for image_file_path in image_files:
            img = Image.open(image_file_path)
            logger.log_image(img)

        profile = logger.profile
        columns = profile.columns
        for column_name in _EXPECTED_COLUMNS:
            assert column_name in columns, f"{column_name} not found in {columns}"
    shutil.rmtree(output_path, ignore_errors=True)
コード例 #2
0
def test_log_metrics_with_boolean_labels(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)
    targets = [True, False, True]

    predictions = [False, True, False]
    scores = [0.2, 0.5, 0.6]
    with session.logger("metrics_test") as logger:
        logger.log_metrics(targets, predictions, scores)

        profile = logger.profile
        metrics_profile = profile.model_profile

        assert metrics_profile is not None
        assert len(metrics_profile.metrics.confusion_matrix.labels) == 2
    shutil.rmtree(output_path, ignore_errors=True)
コード例 #3
0
def test_log_metrics(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)
    targets = ["class_name1", "class_name2", "class_name3"]

    predictions = ["class_name1", "class_name2", "class_name2"]
    scores = [0.2, 0.5, 0.6]
    num_labels = 3
    with session.logger("metrics_test") as logger:

        logger.log_metrics(targets, predictions, scores)

        profile = logger.profile
        metrics_profile = profile.model_profile

        assert metrics_profile is not None
        assert len(
            metrics_profile.metrics.confusion_matrix.labels) == num_labels
    shutil.rmtree(output_path)
コード例 #4
0
def test_segments_with_rotation(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig(
        "project", "pipeline", writers=[writer_config])
    with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time:
        session = session_from_config(session_config)
        with session.logger("test", with_rotation_time='s', segments=["home_ownership"], profile_full_dataset=True, cache=1) as logger:
            logger.log_dataframe(df_lending_club)
            frozen_time.tick(delta=datetime.timedelta(seconds=1))
            logger.log_dataframe(df_lending_club)
            frozen_time.tick(delta=datetime.timedelta(seconds=1))

            df = util.testing.makeDataFrame()
            with pytest.raises(KeyError):
                logger.log_dataframe(df)
    output_files = []
    for root, subdirs, files in os.walk(output_path):
        output_files += files
    assert len(output_files) == 8
    shutil.rmtree(output_path)
コード例 #5
0
def test_log_rotation_days(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])
    with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time:
        session = session_from_config(session_config)
        with session.logger("test", with_rotation_time='d',
                            cache_size=1) as logger:
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
            frozen_time.tick(delta=datetime.timedelta(days=1))
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
            frozen_time.tick(delta=datetime.timedelta(days=2))
            df = util.testing.makeDataFrame()
            logger.log_dataframe(df)
    output_files = []
    for root, subdirs, files in os.walk(output_path):
        output_files += files
    assert len(output_files) == 3
    shutil.rmtree(output_path)
コード例 #6
0
ファイル: test_logger.py プロジェクト: sachuin23/whylogs
def test_log_multiple_calls(tmpdir, df_lending_club):
    original_dir = os.curdir
    os.chdir(script_dir)

    p = tmpdir.mkdir("whylogs")

    writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath(
    ), filename_template="dataset_summary-$dataset_timestamp")
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig(
        "project", "pipeline", writers=[writer_config])
    session = session_from_config(session_config)

    now = datetime.datetime.now()
    for i in range(0, 5):
        with session.logger(dataset_timestamp=now + datetime.timedelta(days=i)) as logger:
            logger.log_dataframe(df_lending_club)

    output_files = []
    for root, subdirs, files in os.walk(p):
        output_files += files
    # we run 5 times, so we should have five times more files than the above test
    assert len(output_files) == 25
    os.chdir(original_dir)
コード例 #7
0
ファイル: test_logger.py プロジェクト: sachuin23/whylogs
def test_log_dataframe(tmpdir, df_lending_club):
    p = tmpdir.mkdir("whylogs")

    writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig(
        "project", "pipeline", writers=[writer_config])
    session = session_from_config(session_config)

    with session.logger("lendingclub") as logger:
        assert logger is not None
        logger.log_dataframe(df_lending_club)
        profile = logger.profile
        assert profile is not None

        summary = profile.flat_summary()

        flat_summary = summary['summary']

        assert len(flat_summary) == 151

    output_files = []
    for root, subdirs, files in os.walk(p):
        output_files += files
    assert len(output_files) == 5
コード例 #8
0
ファイル: test_log_rotation.py プロジェクト: whylabs/whylogs
def test_no_log_rotation(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    basewriter = writer_from_config(WriterConfig.from_yaml(yaml_data))
    l = Logger(session_id="", dataset_name="testing", writers=[basewriter], dataset_timestamp=datetime.datetime.now(tz=timezone.utc), with_rotation_time=None)
    l.log({"quick_test": 3})
    l.flush()
コード例 #9
0
def test_segments(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])
    with session_from_config(session_config) as session:
        with session.logger(
                "test",
                segments=[
                    [{
                        "key": "home_ownership",
                        "value": "RENT"
                    }],
                    [{
                        "key": "home_ownership",
                        "value": "MORTGAGE"
                    }],
                ],
                cache_size=1,
        ) as logger:
            logger.log_dataframe(df_lending_club)
            profile = logger.profile
            profiles = logger.segmented_profiles
            mortage_segment = logger.get_segment([{
                "key": "home_ownership",
                "value": "MORTGAGE"
            }])

    assert profile is None
    assert len(profiles) == 2
    assert profiles[list(profiles.keys())[0]].tags["segment"] == json.dumps([{
        "key":
        "home_ownership",
        "value":
        "RENT"
    }])
    assert profiles[list(profiles.keys())[1]].tags["segment"] == json.dumps([{
        "key":
        "home_ownership",
        "value":
        "MORTGAGE"
    }])
    check_segment = profiles[list(profiles.keys())[1]]
    assert mortage_segment == check_segment

    shutil.rmtree(output_path, ignore_errors=True)
コード例 #10
0
ファイル: test_segments.py プロジェクト: whylabs/whylogs
def test_segments_keys(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])
    session = session_from_config(session_config)
    with session.logger("test", segments=["emp_title", "home_ownership"], cache_size=1) as logger:
        logger.log_dataframe(df_lending_club)
        profiles = logger.segmented_profiles
        assert len(profiles) == 47
    shutil.rmtree(output_path, ignore_errors=True)
コード例 #11
0
ファイル: test_logger.py プロジェクト: niparis/whylogs-python
def test_config_api(tmpdir):
    p = tmpdir.mkdir("whylogs")

    writer_config = WriterConfig("local", ["protobuf", "flat"], p.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("test_name") as logger:
        logger.log_dataframe(pd.DataFrame())
    session.close()
コード例 #12
0
ファイル: test_segments.py プロジェクト: whylabs/whylogs
def test_log_multiple_segments(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])

    session = session_from_config(session_config)

    df = pd.DataFrame(data={"x": [1, 2, 3, 1, 2, 3, 1, 2, 3], "y": [4, 5, 6, 5, 6, 4, 6, 4, 5], "z": [0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3]})
    with session.logger("image_test", segments=["x", "y"]) as logger:
        logger.log_segments(df)
        assert len(logger.segmented_profiles) == 9
コード例 #13
0
def get_or_create_session():
    """
    Retrieve the current active global session.

    If no active session exists, attempt to load config and create a new
    session.

    If an active session exists, return the session without loading new
    config.

    Returns
    -------
    session : Session
        The global active session
    """
    global _session
    if _session is not None and _session.is_active():
        _getLogger(__name__).debug(
            "Active session found, ignoring session kwargs")
    else:
        config = load_config()
        if config is None:
            print("WARN: Missing config")
            writer = WriterConfig(type="local",
                                  output_path="output",
                                  formats=["all"])
            config = SessionConfig("default-project", "default-pipeline",
                                   [writer], False)
        _session = session_from_config(config)
    return _session
コード例 #14
0
ファイル: session.py プロジェクト: valer-whylabs/whylogs
def get_or_create_session(path_to_config: Optional[str] = None, report_progress: Optional[bool] = False):
    """
    Retrieve the current active global session.

    If no active session exists, attempt to load config and create a new
    session.

    If an active session exists, return the session without loading new
    config.

    :return: The global active session
    :rtype: Session
    :type path_to_config: str
    """
    global _session
    if _session is not None and _session.is_active():
        _getLogger(__name__).debug("Active session found, ignoring session kwargs")
    else:
        config = load_config(path_to_config)
        if config is None:
            print("WARN: Missing config")

            config = SessionConfig(
                "default-project",
                "default-pipeline",
                [WriterConfig(type="local", output_path="output", formats=["all"])],
                MetadataConfig(type="local", output_path="output", input_path=""),
                False,
            )
        if report_progress is not None:
            config.report_progress = report_progress

        _session = session_from_config(config)
    return _session
コード例 #15
0
ファイル: test_logger.py プロジェクト: valer-whylabs/whylogs
def test_write_template_path():
    data_time = time.from_utc_ms(9999)
    session_time = time.from_utc_ms(88888)
    path_template = "$name-$session_timestamp-$dataset_timestamp-$session_id"
    writer_config = WriterConfig("local", ["protobuf", "flat"], "output", path_template, "dataset-profile-$name")
    writer = writer_from_config(writer_config)
    dp = DatasetProfile("name", data_time, session_time, session_id="session")
    assert writer.path_suffix(dp) == "name-88888-9999-session"
    assert writer.file_name(dp, ".txt") == "dataset-profile-name.txt"
コード例 #16
0
ファイル: test_log_rotation.py プロジェクト: whylabs/whylogs
def test_log_rotation_hour(tmpdir, df):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])
    with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time:
        with session_from_config(session_config) as session:
            with session.logger("test", with_rotation_time="h", cache_size=1) as logger:
                logger.log_dataframe(df)
                frozen_time.tick(delta=datetime.timedelta(hours=3))
                logger.log(feature_name="E", value=4)
                logger.log_dataframe(df)

    output_files = []
    for _, _, files in os.walk(output_path):
        output_files += files
    assert len(output_files) == 2
    shutil.rmtree(output_path, ignore_errors=True)
コード例 #17
0
def reset_default_session():
    """
    Reset and deactivate the global whylogs logging session.
    """
    global _session
    if _session is not None:
        _session.close()
    config: SessionConfig = load_config()
    if config is None:
        config = SessionConfig("default-project", "default-pipeline", [
            WriterConfig(type="local", output_path="output", formats=["all"])
        ], False)
    _session = session_from_config(config)
コード例 #18
0
def test_log_image(tmpdir, image_files):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("image_test") as logger:

        for image_file_path in image_files:
            logger.log_image(image_file_path)

        profile = logger.profile
        columns = profile.columns
        assert len(columns) == 19
    shutil.rmtree(output_path, ignore_errors=True)
コード例 #19
0
ファイル: test_segments.py プロジェクト: whylabs/whylogs
def test_segments(df_lending_club, tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)
    test_segments = [
        [{"key": "home_ownership", "value": "RENT"}],
        [{"key": "home_ownership", "value": "MORTGAGE"}],
    ]

    session_config = SessionConfig("project", "pipeline", writers=[writer_config])
    with session_from_config(session_config) as session:
        with session.logger(
            "test",
            segments=test_segments,
            cache_size=1,
        ) as logger:
            logger.log_dataframe(df_lending_club)
            profile = logger.profile
            profiles = logger.segmented_profiles
            mortage_segment = logger.get_segment(test_segments[1])

    assert profile is None
    assert len(profiles) == 2
    segment_keys = [key for key in profiles[list(profiles.keys())[0]].tags.keys() if key.startswith(_TAG_PREFIX)]
    for segment_key in segment_keys:
        assert profiles[list(profiles.keys())[0]].tags[segment_key] == test_segments[0][0][_TAG_VALUE]  # 'RENT'

    segment_keys = [key for key in profiles[list(profiles.keys())[1]].tags.keys() if key.startswith(_TAG_PREFIX)]
    for segment_key in segment_keys:
        assert profiles[list(profiles.keys())[1]].tags[segment_key] == test_segments[1][0][_TAG_VALUE]  # 'MORTGAGE'

    check_segment = profiles[list(profiles.keys())[1]]
    assert mortage_segment == check_segment
    shutil.rmtree(output_path, ignore_errors=True)
コード例 #20
0
def test_log_pil_image(tmpdir, image_files):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    WriterConfig.from_yaml(yaml_data)

    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])

    session = session_from_config(session_config)

    with session.logger("image_pil_test", with_rotation_time="s",
                        cache_size=1) as logger:

        for image_file_path in image_files:
            img = Image.open(image_file_path)
            logger.log_image(img)

        profile = logger.profile
        columns = profile.columns
        assert len(columns) == 19
    shutil.rmtree(output_path)
コード例 #21
0
def test_log_rotation_concurrency(tmpdir):
    log_rotation_interval = "1s"
    sleep_interval = 2

    test_path = tmpdir.mkdir("log_rotation_concurrency_repro")
    writer_config = WriterConfig(
        "local", ["json"],
        test_path.realpath(),
        filename_template="dataset_summary-$dataset_timestamp")

    # Load the full lending club 1000 csv, to get a chance at hitting the bug.
    csv_path = os.path.join(script_dir, "lending_club_1000.csv")
    full_df = pd.read_csv(csv_path)

    # full_df has shape (1000, 151) so create a test df with 4x size by iteratively appending to self 2 times
    for _ in range(2):
        full_df = full_df.append(full_df)

    TEST_LOGGER.info(f"test dataframe has shape {full_df.shape}")

    # Create a whylogs logging session
    session_config = SessionConfig("project",
                                   "pipeline",
                                   writers=[writer_config])
    session = session_from_config(session_config)

    TEST_LOGGER.info(
        f"Running rotate log test with {log_rotation_interval} flush intervals and {sleep_interval}s pause"
    )
    profiler = cProfile.Profile()
    profiler.enable()
    with session.logger(tags={"datasetId": "model-1"},
                        with_rotation_time=log_rotation_interval) as ylog:
        ylog.log_dataframe(
            full_df
        )  # Log a larger dataframe to increase chance of rotation before seeing all columns
        sleep(sleep_interval)
        ylog.log_dataframe(
            full_df.head(n=2)
        )  # Log a smaller dataframe to get more features before rotation
        sleep(sleep_interval)
    profiler.disable()
    stats = pstats.Stats(profiler).sort_stats("cumulative")
    TEST_LOGGER.info(stats.print_stats(10))

    output_files = []
    for root, subdir, file_names in os.walk(test_path):
        if not file_names:
            continue
        if subdir:
            for directory in subdir:
                for file in file_names:
                    full_file_path = os.path.join(root, directory, file)
                    output_files += [full_file_path]
        else:
            for file in file_names:
                full_file_path = os.path.join(root, file)
                output_files += [full_file_path]

    assert len(
        output_files) > 0, "No output files were generated during stress test"
    TEST_LOGGER.debug(f"Generated {len(output_files)} dataset summary files.")

    feature_counts = []
    for filename in output_files:
        feature_count = count_features(filename)
        if feature_count > 0:
            feature_counts.append((count_features(filename), filename))

    assert len(
        feature_counts
    ) > 0, f"feature counts are all empty, we expect some empty files with aggressive log rotation but not all empty!"
    TEST_LOGGER.info(
        f"Feature counts all same, first file with features was {feature_counts[0]}"
    )
    TEST_LOGGER.debug(f"There were {len(feature_counts)} files with features.")
    assert_all_elements_equal(feature_counts)
    rmtree(test_path, ignore_errors=True)
    TEST_LOGGER.debug(f"End cleaning up test directory {test_path}")