def get_or_create_session(path_to_config: Optional[str] = None, report_progress: Optional[bool] = False): """ Retrieve the current active global session. If no active session exists, attempt to load config and create a new session. If an active session exists, return the session without loading new config. :return: The global active session :rtype: Session :type path_to_config: str """ global _session if _session is not None and _session.is_active(): _getLogger(__name__).debug("Active session found, ignoring session kwargs") else: config = load_config(path_to_config) if config is None: print("WARN: Missing config") config = SessionConfig( "default-project", "default-pipeline", [WriterConfig(type="local", output_path="output", formats=["all"])], MetadataConfig(type="local", output_path="output", input_path=""), False, ) if report_progress is not None: config.report_progress = report_progress _session = session_from_config(config) return _session
def test_s3_writer_metadata(df_lending_club, moto_boto, s3_all_config_metadata_path): assert os.path.exists(s3_all_config_metadata_path) config = load_config(s3_all_config_metadata_path) session = session_from_config(config) session.estimate_segments(df_lending_club, name="dataset_test", target_field="funded_amnt_inv", max_segments=30) client = boto3.client("s3") objects = client.list_objects(Bucket="mocked_bucket") for idx, each_objc in enumerate(objects["Contents"]): assert each_objc["Key"] == "metadata/segments.json" with session.logger("dataset_test") as logger: logger.log_dataframe(df_lending_club) session.close() objects = client.list_objects(Bucket="mocked_bucket") print(objects) for idx, each_objc in enumerate(objects["Contents"]): print(each_objc["Key"]) assert each_objc["Key"] == object_keys_meta_config[idx]
def get_or_create_session(): """ Retrieve the current active global session. If no active session exists, attempt to load config and create a new session. If an active session exists, return the session without loading new config. Returns ------- session : Session The global active session """ global _session if _session is not None and _session.is_active(): _getLogger(__name__).debug( "Active session found, ignoring session kwargs") else: config = load_config() if config is None: print("WARN: Missing config") writer = WriterConfig(type="local", output_path="output", formats=["all"]) config = SessionConfig("default-project", "default-pipeline", [writer], False) _session = session_from_config(config) return _session
def reset_default_session(): """ Reset and deactivate the global whylogs logging session. """ global _session if _session is not None: _session.close() config: SessionConfig = load_config() if config is None: raise EnvironmentError("Unable to load whylogs config") _session = session_from_config(config)
def reset_default_session(): """ Reset and deactivate the global whylogs logging session. """ global _session if _session is not None: _session.close() config: SessionConfig = load_config() if config is None: config = SessionConfig("default-project", "default-pipeline", [ WriterConfig(type="local", output_path="output", formats=["all"]) ], False) _session = session_from_config(config)
def test_s3_writer_transport_config(s3_transport_config_path): assert os.path.exists(s3_transport_config_path) config = load_config(s3_transport_config_path) assert config.writers[ 0].transport_parameters.endpoint_url == "http://127.0.0.1:5000/" assert config.writers[ 0].transport_parameters.aws_access_key_id == AWS_ACCESS_KEY_ID assert config.writers[ 0].transport_parameters.aws_secret_access_key == AWS_SECRET_ACCESS_KEY assert config.writers[ 0].transport_parameters.region_name == DEFAULT_REGION_NAME assert config.writers[0].transport_parameters.verify == "path/to/file"
def test_s3_writer(df_lending_club, moto_boto, s3_all_config_path): assert os.path.exists(s3_all_config_path) config = load_config(s3_all_config_path) session = session_from_config(config) with session.logger("dataset_test_s3") as logger: logger.log_dataframe(df_lending_club) session.close() client = boto3.client("s3") objects = client.list_objects(Bucket="mocked_bucket") for idx, each_objc in enumerate(objects["Contents"]): assert each_objc["Key"] == object_keys[idx]
def test_mlflow_patched(mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) assert whylogs.enable_mlflow(session) assert mlflow.whylogs is not None print("HEY LISTEN") whylogs.mlflow.disable_mlflow()
def test_profile_viewer(tmpdir, local_config_path): config = load_config(local_config_path) session = session_from_config(config) with session.logger("mytestytest", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger: for _ in range(5): logger.log({"uniform_integers": np.random.randint(0, 50)}) logger.log({"nulls": None}) profile = logger.profile result = profile_viewer(profiles=[profile], output_path=tmpdir + "my_test.html") assert os.path.exists(tmpdir + "my_test.html") assert result == tmpdir + "my_test.html"
def test_s3_writer(df_lending_club, moto_boto, s3_config_path): assert os.path.exists(s3_config_path) config = load_config(s3_config_path) session = session_from_config(config) with session.logger("dataset_test_s3") as logger: logger.log_dataframe(df_lending_club) client = boto3.client('s3') objects = client.list_objects(Bucket="mocked_bucket") assert len([each_obj["Key"] for each_obj in objects["Contents"]]) == 1 assert objects["Contents"][0]["Key"] == "dataset_test_s3/dataset_summary/protobuf/dataset_summary.bin" assert "s3:" not in [d.name for d in os.scandir( os.getcwd()) if d.is_dir()]
def test_get_run_profiles_shouldReturn_multipleProfiles( tmpdir, mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) set_up_mlflow(mlflow, tmpdir) whylogs.enable_mlflow(session) with mlflow.start_run(): mlflow.whylogs.log(features={"a": 1}) mlflow.whylogs.log(features={"a": 1}, dataset_name="another-profile") with mlflow.start_run(): mlflow.whylogs.log(features={"a": 1}, dataset_name="another-profile") runs = whylogs.mlflow.list_whylogs_runs("0") default_profiles = whylogs.mlflow.get_run_profiles(run_id=runs[0].run_id) another_profile = whylogs.mlflow.get_run_profiles( run_id=runs[0].run_id, dataset_name="another-profile") assert len(runs) == 2 # verify the number of profiles for each datasetname assert len( whylogs.mlflow.get_experiment_profiles("0", dataset_name="default")) == 2 assert len( whylogs.mlflow.get_experiment_profiles( "0", dataset_name="another-profile")) == 2 # for the first run, verify content assert len(default_profiles) == 1 assert len(another_profile) == 1 # assert default_profiles[0].name == "default" assert default_profiles[0].dataset_timestamp is not None assert another_profile[0].dataset_timestamp is not None
def test_patch_multiple_times(mlflow_config_path): import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) # patch three times assert whylogs.enable_mlflow(session) assert whylogs.enable_mlflow(session) assert whylogs.enable_mlflow(session) import mlflow assert mlflow.whylogs is not None whylogs.mlflow.disable_mlflow()
def test_assert_whylogsrun_close_is_called(tmpdir, mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) set_up_mlflow(mlflow, tmpdir) with mock.patch.object(whylogs.mlflow.patcher.WhyLogsRun, "_close") as mock_close: whylogs.enable_mlflow(session) with mlflow.start_run(): pass mock_close.assert_called_once() whylogs.mlflow.disable_mlflow()
def test_assert_log_artifact_is_called(tmpdir, mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) set_up_mlflow(mlflow, tmpdir) with mock.patch.object(mlflow, "log_artifact") as log_artifact: whylogs.enable_mlflow(session) with mlflow.start_run(): mlflow.whylogs.log(features={"a": 1}) log_artifact.assert_called_once() whylogs.mlflow.disable_mlflow()
def test_listRuns_shouldReturn_NoRuns(tmpdir, mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) set_up_mlflow(mlflow, tmpdir) whylogs.enable_mlflow(session) for i in range(0, 10): with mlflow.start_run(): pass assert len(mlflow.list_run_infos("0")) == 10 assert len(whylogs.mlflow.list_whylogs_runs("0")) == 0 whylogs.mlflow.disable_mlflow()
def session_from_config(config: SessionConfig = None, config_path: Optional[str] = "") -> Session: """ Construct a whylogs session from a `SessionConfig` or from a config_path """ if config is None: config = load_config(config_path) writers = list(map(lambda x: writer_from_config(x), config.writers)) metadata_writer = None if config.metadata: metadata_writer = metadata_from_config(config.metadata) return Session( config.project, config.pipeline, writers, metadata_writer, config.verbose, config.with_rotation_time, config.cache_size, report_progress=config.report_progress, )
def test_listRuns_shouldReturn_CorrectRunCount(tmpdir, mlflow_config_path): import mlflow import whylogs from whylogs.app.config import load_config from whylogs.app.session import session_from_config assert os.path.exists(mlflow_config_path) config = load_config(mlflow_config_path) session = session_from_config(config) set_up_mlflow(mlflow, tmpdir) whylogs.enable_mlflow(session) for i in range(0, 10): with mlflow.start_run(): if i % 2 == 0: mlflow.whylogs.log(features={"a": 1}) print("WEIRD") assert len(mlflow.list_run_infos("0")) == 10 assert len(whylogs.mlflow.list_whylogs_runs("0")) == 5 assert len(whylogs.mlflow.get_experiment_profiles("0")) == 5 whylogs.mlflow.disable_mlflow()