Esempio n. 1
0
def test_no_log_rotation(tmpdir):
    output_path = tmpdir.mkdir("whylogs")
    shutil.rmtree(output_path, ignore_errors=True)
    writer_config = WriterConfig("local", ["protobuf"], output_path.realpath())
    yaml_data = writer_config.to_yaml()
    basewriter = writer_from_config(WriterConfig.from_yaml(yaml_data))
    l = Logger(session_id="", dataset_name="testing", writers=[basewriter], dataset_timestamp=datetime.datetime.now(tz=timezone.utc), with_rotation_time=None)
    l.log({"quick_test": 3})
    l.flush()
Esempio n. 2
0
    def logger(
        self,
        dataset_name: Optional[str] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        session_timestamp: Optional[datetime.datetime] = None,
    ) -> Logger:
        """
        Create a new logger or return an existing one for a given dataset name.
        If no dataset_name is specified, we default to project name

        Parameters
        ----------
        dataset_name : str
            Name of the dataset. Default is the project name
        dataset_timestamp: datetime.datetime, optional
            The timestamp associated with the dataset. Could be the timestamp
            for the batch, or the timestamp
            for the window that you are tracking
        session_timestamp: datetime.datetime, optional
            Override the timestamp associated with the session. Normally you
            shouldn't need to override this value
        Returns
        -------
        ylog : whylogs.app.logger.Logger
            WhyLogs logger
        """
        if dataset_name is None:
            # using the project name for the datasetname
            dataset_name = self.project
        if session_timestamp is None:
            session_timestamp = self._session_time

        if not self._active:
            raise RuntimeError(
                "Session is already closed. Cannot create more loggers")
        logger = self._loggers.get(dataset_name)
        if logger is None:
            logger = Logger(
                dataset_name=dataset_name,
                dataset_timestamp=dataset_timestamp,
                session_timestamp=session_timestamp,
                writers=self.writers,
                verbose=self.verbose,
            )
            self._loggers[dataset_name] = logger

        return logger
Esempio n. 3
0
    def _create_logger(self, dataset_name: Optional[str] = None):
        active_run = _mlflow.active_run()

        if self._active_run_id is not None and active_run is None:
            self._close()
            self._active_run_id = None
            return None

        run_info = active_run.info
        if run_info.run_id != self._active_run_id:
            logger.debug(
                "Detect a new run ID. Previous run ID: %s. New run ID: %s.",
                self._active_run_id,
                run_info.run_id,
            )
            self._close()
            self._active_run_id = run_info.run_id

        session_timestamp = datetime.datetime.utcfromtimestamp(
            run_info.start_time / 1000.0
        )
        experiment: _mlflow.entities.Experiment = _mlflow.tracking.MlflowClient().get_experiment(
            run_info.experiment_id
        )
        logger_dataset_name = dataset_name or experiment.name
        tags = dict(active_run.data.tags)
        tags["mflow.experiment_id"] = active_run.info.experiment_id
        tags["mflow.run_id"] = active_run.info.run_id
        logger.debug(
            "Creating a new logger for dataset name: %s. Tags: %s",
            logger_dataset_name,
            tags,
        )
        return Logger(
            run_info.run_id,
            logger_dataset_name,
            session_timestamp=session_timestamp,
            dataset_timestamp=session_timestamp,
            tags=tags,
            writers=[],
        )
Esempio n. 4
0
    def logger(
        self,
        dataset_name: Optional[str] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        session_timestamp: Optional[datetime.datetime] = None,
        tags: Dict[str, str] = None,
        metadata: Dict[str, str] = None,
    ) -> Logger:
        """
        Create a new logger or return an existing one for a given dataset name.
        If no dataset_name is specified, we default to project name

        Parameters
        ----------
        metadata
        dataset_name : str
            Name of the dataset. Default is the project name
        dataset_timestamp: datetime.datetime, optional
            The timestamp associated with the dataset. Could be the timestamp
            for the batch, or the timestamp
            for the window that you are tracking
        tags: dict
            Tag the data with groupable information. For example, you might want to tag your data
            with the stage information (development, testing, production etc...)
        metadata: dict
            Useful to debug the data source. You can associate non-groupable information in this field
            such as hostname,
        session_timestamp: datetime.datetime, optional
            Override the timestamp associated with the session. Normally you
            shouldn't need to override this value
        Returns
        -------
        ylog : whylogs.app.logger.Logger
            whylogs logger
        """
        if not self._active:
            raise RuntimeError(
                "Session is already closed. Cannot create more loggers")

        if dataset_name is None:
            # using the project name for the datasetname
            dataset_name = self.project
        if session_timestamp is None:
            session_timestamp = self._session_time

        # remove inactive loggers first
        for name, logger in list(self._loggers.items()):
            if not logger.is_active():
                self._loggers.pop(name)

        logger = self._loggers.get(dataset_name)
        if logger is None:
            logger = Logger(
                session_id=self._session_id,
                dataset_name=dataset_name,
                dataset_timestamp=dataset_timestamp,
                session_timestamp=session_timestamp,
                writers=self.writers,
                tags=tags,
                metadata=metadata,
                verbose=self.verbose,
            )
            self._loggers[dataset_name] = logger

        return logger
Esempio n. 5
0
    def logger(
        self,
        dataset_name: Optional[str] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        session_timestamp: Optional[datetime.datetime] = None,
        tags: Dict[str, str] = None,
        metadata: Dict[str, str] = None,
        segments: Optional[Union[List[Dict], List[str], str]] = None,
        profile_full_dataset: bool = False,
        with_rotation_time: str = None,
        cache_size: int = 1,
        constraints: DatasetConstraints = None,
    ) -> Logger:
        """
        Create a new logger or return an existing one for a given dataset name.
        If no dataset_name is specified, we default to project name

        Args:
            dataset_name: name of the dataset
            dataset_timestamp: timestamp of the dataset. Default to now
            session_timestamp: timestamp of the session. Inherits from the session
            tags: metadata associated with the profile
            metadata: same as tags. Will be deprecated
            segments: slice of data that the profile belongs to
            profile_full_dataset: when segmenting dataset, an option to keep the full unsegmented profile of the dataset
            with_rotation_time: rotation time in minutes our hours ("1m", "1h")
            cache_size: size of the segment cache
            constraints: whylogs contrainst to monitor against
        """
        if not self._active:
            raise RuntimeError("Session is already closed. Cannot create more loggers")

        logger_key = str(
            _LoggerKey(
                dataset_name=dataset_name,
                dataset_timestamp=dataset_timestamp,
                session_timestamp=session_timestamp,
                tags=tags,
                metadata=metadata,
                segments=segments,
                profile_full_dataset=profile_full_dataset,
                with_rotation_time=with_rotation_time,
                cache_size=cache_size,
                constraints=constraints,
            )
        )
        logger = self._loggers.get(logger_key)

        if logger is None or not logger.is_active():
            logger = Logger(
                session_id=self._session_id,
                dataset_name=dataset_name or self.project,
                dataset_timestamp=dataset_timestamp,
                session_timestamp=session_timestamp or self._session_time,
                writers=self.writers,
                metadata_writer=self.metadata_writer,
                tags=tags or {},
                metadata=metadata,
                verbose=self.verbose,
                with_rotation_time=with_rotation_time or self.with_rotation_time,
                segments=segments,
                profile_full_dataset=profile_full_dataset,
                cache_size=cache_size,
                constraints=constraints,
            )
            self._loggers[logger_key] = logger

        return logger
Esempio n. 6
0
def test_log_rotation_parsing():
    with freeze_time("2012-01-14 03:21:34", tz_offset=-4) as frozen_time:
        l = Logger(session_id="", dataset_name="testing")
        now = int(datetime.datetime.utcnow().timestamp())
        l._set_rotation(with_rotation_time="s")
        assert l.interval == 1
        assert l.rotate_at == now + 1
        l._set_rotation(with_rotation_time="m")
        assert l.interval == 60
        assert l.rotate_at == (now + l.interval)
        l._set_rotation(with_rotation_time="h")
        assert l.interval == 60 * 60
        assert l.rotate_at == (now + l.interval)
        l._set_rotation(with_rotation_time="d")
        assert l.interval == 24 * 60 * 60
        assert l.rotate_at == now + l.interval
        l._set_rotation(with_rotation_time="30s")
        assert l.interval == 30
        assert l.rotate_at == now + l.interval
        l._set_rotation(with_rotation_time="10m")
        assert l.interval == 10 * 60
        assert l.rotate_at == now + l.interval
        l._set_rotation(with_rotation_time="10h")
        assert l.interval == 10 * 60 * 60
        assert l.rotate_at == now + l.interval
        l._set_rotation(with_rotation_time="2d")
        assert l.interval == 2 * 24 * 60 * 60
        assert l.rotate_at == now + l.interval
        # make sure bogus specifications get flagged.
        with pytest.raises(TypeError):
            l._set_rotation(with_rotation_time="-2d")
        with pytest.raises(TypeError):
            l._set_rotation(with_rotation_time="2")
        with pytest.raises(TypeError):
            l._set_rotation(with_rotation_time="s2")
Esempio n. 7
0
    def logger(
        self,
        dataset_name: Optional[str] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        session_timestamp: Optional[datetime.datetime] = None,
        tags: Dict[str, str] = None,
        metadata: Dict[str, str] = None,
        segments: Optional[Union[List[Dict], List[str], str]] = None,
        profile_full_dataset: bool = False,
        with_rotation_time: str = None,
        cache_size: int = 1,
        constraints: DatasetConstraints = None,
    ) -> Logger:
        """
        Create a new logger or return an existing one for a given dataset name.
        If no dataset_name is specified, we default to project name

        Parameters
        ----------
        metadata
        dataset_name : str
            Name of the dataset. Default is the project name
        dataset_timestamp: datetime.datetime, optional
            The timestamp associated with the dataset. Could be the timestamp
            for the batch, or the timestamp
            for the window that you are tracking
        tags: dict
            Tag the data with groupable information. For example, you might want to tag your data
            with the stage information (development, testing, production etc...)
        metadata: dict
            Useful to debug the data source. You can associate non-groupable information in this field
            such as hostname,
        session_timestamp: datetime.datetime, optional
            Override the timestamp associated with the session. Normally you
            shouldn't need to override this value
        segments:
            Can be either:
            - Autosegmentation source, one of ["auto", "local"]
            - List of tag key value pairs for tracking data segments
            - List of tag keys for which we will track every value
            - None, no segments will be used
        Returns
        -------
        ylog : whylogs.app.logger.Logger
            whylogs logger
        """
        if tags is None:
            tags = {}

        if not self._active:
            raise RuntimeError(
                "Session is already closed. Cannot create more loggers")

        if dataset_name is None:
            # using the project name for the datasetname
            dataset_name = self.project

        if session_timestamp is None:
            session_timestamp = self._session_time
        if with_rotation_time is None:
            with_rotation_time = self.with_rotation_time

        # remove inactive loggers first
        for name, logger in list(self._loggers.items()):
            if not logger.is_active():
                self._loggers.pop(name)

        logger = self._loggers.get(dataset_name)

        if logger is None:
            logger = Logger(
                session_id=self._session_id,
                dataset_name=dataset_name,
                dataset_timestamp=dataset_timestamp,
                session_timestamp=session_timestamp,
                writers=self.writers,
                tags=tags,
                metadata=metadata,
                verbose=self.verbose,
                with_rotation_time=with_rotation_time,
                segments=segments,
                profile_full_dataset=profile_full_dataset,
                cache_size=cache_size,
                constraints=constraints,
            )
            self._loggers[dataset_name] = logger

        return logger