def test_no_log_rotation(tmpdir): output_path = tmpdir.mkdir("whylogs") shutil.rmtree(output_path, ignore_errors=True) writer_config = WriterConfig("local", ["protobuf"], output_path.realpath()) yaml_data = writer_config.to_yaml() basewriter = writer_from_config(WriterConfig.from_yaml(yaml_data)) l = Logger(session_id="", dataset_name="testing", writers=[basewriter], dataset_timestamp=datetime.datetime.now(tz=timezone.utc), with_rotation_time=None) l.log({"quick_test": 3}) l.flush()
def logger( self, dataset_name: Optional[str] = None, dataset_timestamp: Optional[datetime.datetime] = None, session_timestamp: Optional[datetime.datetime] = None, ) -> Logger: """ Create a new logger or return an existing one for a given dataset name. If no dataset_name is specified, we default to project name Parameters ---------- dataset_name : str Name of the dataset. Default is the project name dataset_timestamp: datetime.datetime, optional The timestamp associated with the dataset. Could be the timestamp for the batch, or the timestamp for the window that you are tracking session_timestamp: datetime.datetime, optional Override the timestamp associated with the session. Normally you shouldn't need to override this value Returns ------- ylog : whylogs.app.logger.Logger WhyLogs logger """ if dataset_name is None: # using the project name for the datasetname dataset_name = self.project if session_timestamp is None: session_timestamp = self._session_time if not self._active: raise RuntimeError( "Session is already closed. Cannot create more loggers") logger = self._loggers.get(dataset_name) if logger is None: logger = Logger( dataset_name=dataset_name, dataset_timestamp=dataset_timestamp, session_timestamp=session_timestamp, writers=self.writers, verbose=self.verbose, ) self._loggers[dataset_name] = logger return logger
def _create_logger(self, dataset_name: Optional[str] = None): active_run = _mlflow.active_run() if self._active_run_id is not None and active_run is None: self._close() self._active_run_id = None return None run_info = active_run.info if run_info.run_id != self._active_run_id: logger.debug( "Detect a new run ID. Previous run ID: %s. New run ID: %s.", self._active_run_id, run_info.run_id, ) self._close() self._active_run_id = run_info.run_id session_timestamp = datetime.datetime.utcfromtimestamp( run_info.start_time / 1000.0 ) experiment: _mlflow.entities.Experiment = _mlflow.tracking.MlflowClient().get_experiment( run_info.experiment_id ) logger_dataset_name = dataset_name or experiment.name tags = dict(active_run.data.tags) tags["mflow.experiment_id"] = active_run.info.experiment_id tags["mflow.run_id"] = active_run.info.run_id logger.debug( "Creating a new logger for dataset name: %s. Tags: %s", logger_dataset_name, tags, ) return Logger( run_info.run_id, logger_dataset_name, session_timestamp=session_timestamp, dataset_timestamp=session_timestamp, tags=tags, writers=[], )
def logger( self, dataset_name: Optional[str] = None, dataset_timestamp: Optional[datetime.datetime] = None, session_timestamp: Optional[datetime.datetime] = None, tags: Dict[str, str] = None, metadata: Dict[str, str] = None, ) -> Logger: """ Create a new logger or return an existing one for a given dataset name. If no dataset_name is specified, we default to project name Parameters ---------- metadata dataset_name : str Name of the dataset. Default is the project name dataset_timestamp: datetime.datetime, optional The timestamp associated with the dataset. Could be the timestamp for the batch, or the timestamp for the window that you are tracking tags: dict Tag the data with groupable information. For example, you might want to tag your data with the stage information (development, testing, production etc...) metadata: dict Useful to debug the data source. You can associate non-groupable information in this field such as hostname, session_timestamp: datetime.datetime, optional Override the timestamp associated with the session. Normally you shouldn't need to override this value Returns ------- ylog : whylogs.app.logger.Logger whylogs logger """ if not self._active: raise RuntimeError( "Session is already closed. Cannot create more loggers") if dataset_name is None: # using the project name for the datasetname dataset_name = self.project if session_timestamp is None: session_timestamp = self._session_time # remove inactive loggers first for name, logger in list(self._loggers.items()): if not logger.is_active(): self._loggers.pop(name) logger = self._loggers.get(dataset_name) if logger is None: logger = Logger( session_id=self._session_id, dataset_name=dataset_name, dataset_timestamp=dataset_timestamp, session_timestamp=session_timestamp, writers=self.writers, tags=tags, metadata=metadata, verbose=self.verbose, ) self._loggers[dataset_name] = logger return logger
def logger( self, dataset_name: Optional[str] = None, dataset_timestamp: Optional[datetime.datetime] = None, session_timestamp: Optional[datetime.datetime] = None, tags: Dict[str, str] = None, metadata: Dict[str, str] = None, segments: Optional[Union[List[Dict], List[str], str]] = None, profile_full_dataset: bool = False, with_rotation_time: str = None, cache_size: int = 1, constraints: DatasetConstraints = None, ) -> Logger: """ Create a new logger or return an existing one for a given dataset name. If no dataset_name is specified, we default to project name Args: dataset_name: name of the dataset dataset_timestamp: timestamp of the dataset. Default to now session_timestamp: timestamp of the session. Inherits from the session tags: metadata associated with the profile metadata: same as tags. Will be deprecated segments: slice of data that the profile belongs to profile_full_dataset: when segmenting dataset, an option to keep the full unsegmented profile of the dataset with_rotation_time: rotation time in minutes our hours ("1m", "1h") cache_size: size of the segment cache constraints: whylogs contrainst to monitor against """ if not self._active: raise RuntimeError("Session is already closed. Cannot create more loggers") logger_key = str( _LoggerKey( dataset_name=dataset_name, dataset_timestamp=dataset_timestamp, session_timestamp=session_timestamp, tags=tags, metadata=metadata, segments=segments, profile_full_dataset=profile_full_dataset, with_rotation_time=with_rotation_time, cache_size=cache_size, constraints=constraints, ) ) logger = self._loggers.get(logger_key) if logger is None or not logger.is_active(): logger = Logger( session_id=self._session_id, dataset_name=dataset_name or self.project, dataset_timestamp=dataset_timestamp, session_timestamp=session_timestamp or self._session_time, writers=self.writers, metadata_writer=self.metadata_writer, tags=tags or {}, metadata=metadata, verbose=self.verbose, with_rotation_time=with_rotation_time or self.with_rotation_time, segments=segments, profile_full_dataset=profile_full_dataset, cache_size=cache_size, constraints=constraints, ) self._loggers[logger_key] = logger return logger
def test_log_rotation_parsing(): with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time: l = Logger(session_id="", dataset_name="testing") now = int(datetime.datetime.utcnow().timestamp()) l._set_rotation(with_rotation_time="s") assert l.interval == 1 assert l.rotate_at == now + 1 l._set_rotation(with_rotation_time="m") assert l.interval == 60 assert l.rotate_at == (now + l.interval) l._set_rotation(with_rotation_time="h") assert l.interval == 60 * 60 assert l.rotate_at == (now + l.interval) l._set_rotation(with_rotation_time="d") assert l.interval == 24 * 60 * 60 assert l.rotate_at == now + l.interval l._set_rotation(with_rotation_time="30s") assert l.interval == 30 assert l.rotate_at == now + l.interval l._set_rotation(with_rotation_time="10m") assert l.interval == 10 * 60 assert l.rotate_at == now + l.interval l._set_rotation(with_rotation_time="10h") assert l.interval == 10 * 60 * 60 assert l.rotate_at == now + l.interval l._set_rotation(with_rotation_time="2d") assert l.interval == 2 * 24 * 60 * 60 assert l.rotate_at == now + l.interval # make sure bogus specifications get flagged. with pytest.raises(TypeError): l._set_rotation(with_rotation_time="-2d") with pytest.raises(TypeError): l._set_rotation(with_rotation_time="2") with pytest.raises(TypeError): l._set_rotation(with_rotation_time="s2")
def logger( self, dataset_name: Optional[str] = None, dataset_timestamp: Optional[datetime.datetime] = None, session_timestamp: Optional[datetime.datetime] = None, tags: Dict[str, str] = None, metadata: Dict[str, str] = None, segments: Optional[Union[List[Dict], List[str], str]] = None, profile_full_dataset: bool = False, with_rotation_time: str = None, cache_size: int = 1, constraints: DatasetConstraints = None, ) -> Logger: """ Create a new logger or return an existing one for a given dataset name. If no dataset_name is specified, we default to project name Parameters ---------- metadata dataset_name : str Name of the dataset. Default is the project name dataset_timestamp: datetime.datetime, optional The timestamp associated with the dataset. Could be the timestamp for the batch, or the timestamp for the window that you are tracking tags: dict Tag the data with groupable information. For example, you might want to tag your data with the stage information (development, testing, production etc...) metadata: dict Useful to debug the data source. You can associate non-groupable information in this field such as hostname, session_timestamp: datetime.datetime, optional Override the timestamp associated with the session. Normally you shouldn't need to override this value segments: Can be either: - Autosegmentation source, one of ["auto", "local"] - List of tag key value pairs for tracking data segments - List of tag keys for which we will track every value - None, no segments will be used Returns ------- ylog : whylogs.app.logger.Logger whylogs logger """ if tags is None: tags = {} if not self._active: raise RuntimeError( "Session is already closed. Cannot create more loggers") if dataset_name is None: # using the project name for the datasetname dataset_name = self.project if session_timestamp is None: session_timestamp = self._session_time if with_rotation_time is None: with_rotation_time = self.with_rotation_time # remove inactive loggers first for name, logger in list(self._loggers.items()): if not logger.is_active(): self._loggers.pop(name) logger = self._loggers.get(dataset_name) if logger is None: logger = Logger( session_id=self._session_id, dataset_name=dataset_name, dataset_timestamp=dataset_timestamp, session_timestamp=session_timestamp, writers=self.writers, tags=tags, metadata=metadata, verbose=self.verbose, with_rotation_time=with_rotation_time, segments=segments, profile_full_dataset=profile_full_dataset, cache_size=cache_size, constraints=constraints, ) self._loggers[dataset_name] = logger return logger