def __init__(self, columns, bins=10): super(ContinuousHistogramProfiler, self).__init__(columns) self._bins = bins self._np = maybe_dependency("numpy") if self._np is None: raise ImportError( "numpy is not installed; try `pip install numpy`")
def as_dataframe(self): """ Returns this collection of Experiment Runs as a table. Returns ------- :class:`pandas.DataFrame` """ pd = importer.maybe_dependency("pandas") if pd is None: e = ImportError("pandas is not installed; try `pip install pandas`") six.raise_from(e, None) ids = [] data = [] for run in self: run_data = {} run_data.update({'hpp.'+k: v for k, v in run.get_hyperparameters().items()}) run_data.update({'metric.'+k: v for k, v in run.get_metrics().items()}) ids.append(run.id) data.append(run_data) columns = set() for run_data in data: columns.update(run_data.keys()) return pd.DataFrame(data, index=ids, columns=sorted(list(columns)))
def log_reference_data(self, X, Y, overwrite=False): """Log tabular reference data. Parameters ---------- X : pd.DataFrame Reference data inputs. Y : pd.DataFrame Reference data outputs. overwrite : bool, default False Whether to allow overwriting existing reference data. """ pd = importer.maybe_dependency("pandas") if pd is None: raise ImportError( "pandas is not installed; try `pip install pandas`") if isinstance(X, pd.Series): X = X.to_frame() if isinstance(Y, pd.Series): Y = Y.to_frame() if not isinstance(X, pd.DataFrame): raise TypeError("`X` must be a DataFrame, not {}".format(type(X))) if not isinstance(Y, pd.DataFrame): raise TypeError("`Y` must be a DataFrame, not {}".format(type(Y))) df = pd.DataFrame() for c in X.columns: df["input." + str(c)] = X[c] for c in Y.columns: df["output." + str(c)] = Y[c] df["source"] = "reference" df["model_version_id"] = self.id tempf = tempfile.NamedTemporaryFile(suffix=".csv", delete=False) try: df.to_csv(tempf.name, encoding="utf-8", index=False) self.log_artifact("reference_data", tempf.name, overwrite=overwrite) finally: os.remove(tempf.name)
def get_artifact(self, key): """ Gets the artifact with name `key` from this Model Version. If the artifact was originally logged as just a filesystem path, that path will be returned. Otherwise, bytes representing the artifact object will be returned. Parameters ---------- key : str Name of the artifact. Returns ------- str or object or bytes Path of the artifact, the artifact object, or a bytestream representing the artifact. """ artifact = self._get_artifact( key, _CommonCommonService.ArtifactTypeEnum.BLOB) artifact_stream = six.BytesIO(artifact) torch = importer.maybe_dependency("torch") if torch is not None: try: obj = torch.load(artifact_stream) except: # not something torch can deserialize artifact_stream.seek(0) else: artifact_stream.close() return obj try: obj = pickle.load(artifact_stream) except: # not something pickle can deserialize artifact_stream.seek(0) else: artifact_stream.close() return obj return artifact_stream
# -*- coding: utf-8 -*- import pytest from verta import data_types from verta._internal_utils import importer if importer.maybe_dependency("scipy") is None: pytest.skip("scipy is not installed", allow_module_level=True) class TestConfusionMatrix: def test_confusion_matrix(self): attr = data_types.ConfusionMatrix( value=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], labels=["a", "b", "c"], ) d = { "type": "verta.confusionMatrix.v1", "confusionMatrix": { "labels": ["a", "b", "c"], "value": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], }, } assert attr._as_dict() == d assert attr == data_types._VertaDataType._from_dict(d) def test_confusion_matrix_numpy(self): np = pytest.importorskip("numpy") attr = data_types.ConfusionMatrix( value=np.arange(1, 10).reshape((3, 3)), labels=np.array(["a", "b", "c"]),