Example #1
0
 def __init__(self, columns, bins=10):
     super(ContinuousHistogramProfiler, self).__init__(columns)
     self._bins = bins
     self._np = maybe_dependency("numpy")
     if self._np is None:
         raise ImportError(
             "numpy is not installed; try `pip install numpy`")
Example #2
0
    def as_dataframe(self):
        """
        Returns this collection of Experiment Runs as a table.

        Returns
        -------
        :class:`pandas.DataFrame`

        """
        pd = importer.maybe_dependency("pandas")
        if pd is None:
            e = ImportError("pandas is not installed; try `pip install pandas`")
            six.raise_from(e, None)

        ids = []
        data = []
        for run in self:
            run_data = {}
            run_data.update({'hpp.'+k: v for k, v in run.get_hyperparameters().items()})
            run_data.update({'metric.'+k: v for k, v in run.get_metrics().items()})

            ids.append(run.id)
            data.append(run_data)

        columns = set()
        for run_data in data:
            columns.update(run_data.keys())

        return pd.DataFrame(data, index=ids, columns=sorted(list(columns)))
Example #3
0
    def log_reference_data(self, X, Y, overwrite=False):
        """Log tabular reference data.

        Parameters
        ----------
        X : pd.DataFrame
            Reference data inputs.
        Y : pd.DataFrame
            Reference data outputs.
        overwrite : bool, default False
            Whether to allow overwriting existing reference data.

        """
        pd = importer.maybe_dependency("pandas")
        if pd is None:
            raise ImportError(
                "pandas is not installed; try `pip install pandas`")

        if isinstance(X, pd.Series):
            X = X.to_frame()
        if isinstance(Y, pd.Series):
            Y = Y.to_frame()
        if not isinstance(X, pd.DataFrame):
            raise TypeError("`X` must be a DataFrame, not {}".format(type(X)))
        if not isinstance(Y, pd.DataFrame):
            raise TypeError("`Y` must be a DataFrame, not {}".format(type(Y)))

        df = pd.DataFrame()
        for c in X.columns:
            df["input." + str(c)] = X[c]
        for c in Y.columns:
            df["output." + str(c)] = Y[c]
        df["source"] = "reference"
        df["model_version_id"] = self.id

        tempf = tempfile.NamedTemporaryFile(suffix=".csv", delete=False)
        try:
            df.to_csv(tempf.name, encoding="utf-8", index=False)
            self.log_artifact("reference_data",
                              tempf.name,
                              overwrite=overwrite)
        finally:
            os.remove(tempf.name)
Example #4
0
    def get_artifact(self, key):
        """
        Gets the artifact with name `key` from this Model Version.

        If the artifact was originally logged as just a filesystem path, that path will be returned.
        Otherwise, bytes representing the artifact object will be returned.

        Parameters
        ----------
        key : str
            Name of the artifact.

        Returns
        -------
        str or object or bytes
            Path of the artifact, the artifact object, or a bytestream representing the
            artifact.

        """
        artifact = self._get_artifact(
            key, _CommonCommonService.ArtifactTypeEnum.BLOB)
        artifact_stream = six.BytesIO(artifact)

        torch = importer.maybe_dependency("torch")
        if torch is not None:
            try:
                obj = torch.load(artifact_stream)
            except:  # not something torch can deserialize
                artifact_stream.seek(0)
            else:
                artifact_stream.close()
                return obj

        try:
            obj = pickle.load(artifact_stream)
        except:  # not something pickle can deserialize
            artifact_stream.seek(0)
        else:
            artifact_stream.close()
            return obj

        return artifact_stream
Example #5
0
# -*- coding: utf-8 -*-

import pytest
from verta import data_types
from verta._internal_utils import importer

if importer.maybe_dependency("scipy") is None:
    pytest.skip("scipy is not installed", allow_module_level=True)


class TestConfusionMatrix:
    def test_confusion_matrix(self):
        attr = data_types.ConfusionMatrix(
            value=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
            labels=["a", "b", "c"],
        )
        d = {
            "type": "verta.confusionMatrix.v1",
            "confusionMatrix": {
                "labels": ["a", "b", "c"],
                "value": [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
            },
        }
        assert attr._as_dict() == d
        assert attr == data_types._VertaDataType._from_dict(d)

    def test_confusion_matrix_numpy(self):
        np = pytest.importorskip("numpy")
        attr = data_types.ConfusionMatrix(
            value=np.arange(1, 10).reshape((3, 3)),
            labels=np.array(["a", "b", "c"]),