def evaluate(self, x, y=None, **kwargs):
     if "batch_size" in kwargs:
         self._dws_state.lineage.add_param("evaluate.batch_size", kwargs["batch_size"])
     else:
         self._dws_state.lineage.add_param("evaluate.batch_size", None)
     api_resource = self._dws_state.find_input_resources_and_return_if_api(x, y)
     if api_resource is not None:
         _verify_eager_if_dataset(x, y, api_resource)
         api_resource.dup_hash_state()
         hash_state = api_resource.get_hash_state()
         if isinstance(x, kerasutils.Sequence):
             if y is not None:
                 raise NotSupportedError(
                     "evaluate() method does not suppport a generator for x AND a y value"
                 )
             x = _TfKerasSequenceWrapper(x, hash_state)
         elif isinstance(x, GeneratorType):
             if y is not None:
                 raise NotSupportedError(
                     "evaluate() method does not suppport a generator for x AND a y value"
                 )
             x = _wrap_generator(x, hash_state)
         else:
             _add_to_hash(x, hash_state)
             if y is not None:
                 _add_to_hash(y, hash_state)
     results = super().evaluate(x, y, **kwargs)
     assert len(results) == len(self.metrics_names)
     if api_resource is not None:
         api_resource.save_current_hash()
         api_resource.pop_hash_state()
     self._dws_state.write_metrics_and_complete(
         {n: v for (n, v) in zip(self.metrics_names, results)}
     )
     return results
 def test_pandas_df(self):
     df = pandas.DataFrame({
         'x1': [1, 2, 3, 4, 5],
         'x2': [1.5, 2.5, 3.5, 4.5, 5.5],
         'y': [1, 0, 0, 1, 1]
     })
     _add_to_hash(df, self.hash_state)
     print(self.hash_state.hexdigest())
 def fit(self, x, y=None, **kwargs):
     """x, y can be arrays or x can be a generator.
     """
     if "epochs" in kwargs:
         self._dws_state.lineage.add_param("fit.epochs",
                                           kwargs["epochs"])
     else:
         self._dws_state.lineage.add_param("fit.epochs", 1)
     if "batch_size" in kwargs:
         self._dws_state.lineage.add_param("fit.batch_size",
                                           kwargs["batch_size"])
     else:
         self._dws_state.lineage.add_param("fit.batch_size", None)
     api_resource = self._dws_state.find_input_resources_and_return_if_api(
         x, y)
     if api_resource is not None:
         _verify_eager_if_dataset(x, y, api_resource)
         api_resource.init_hash_state()
         hash_state = api_resource.get_hash_state()
         if isinstance(x, kerasutils.Sequence):
             if y is not None:
                 raise NotSupportedError(
                     "fit() method does not suppport a generator for x AND a y value"
                 )
             x = _TfKerasSequenceWrapper(x, hash_state)
         elif isinstance(x, GeneratorType):
             if y is not None:
                 raise NotSupportedError(
                     "fit() method does not suppport a generator for x AND a y value"
                 )
             x = _wrap_generator(x, hash_state)
         else:  # x and y are provided as full arrays
             _add_to_hash(x, hash_state)
             if y is not None:
                 _add_to_hash(y, hash_state)
             api_resource.save_current_hash(
             )  # in case we evaluate in a separate process
     if self.checkpoint_cb:
         if "callbacks" in kwargs:
             kwargs["callbacks"].append(self.checkpoint_cb)
         else:
             kwargs["callbacks"] = [
                 self.checkpoint_cb,
             ]
     return super().fit(x, y, **kwargs)
Exemple #4
0
    def fit(self, X, y, *args, **kwargs):
        """The underlying fit() method of a predictor trains the predictio based
        on the input data (X) and labels (y).

        If the input resource is an api resource, the wrapper captures the hash of
        the inputs.
        If ``model_save_file`` was specified, it also saves the trained model."""
        api_resource = self._dws_state.find_input_resources_and_return_if_api(
            X, y)
        if api_resource is not None:
            api_resource.init_hash_state()
            hash_state = api_resource.get_hash_state()
            _add_to_hash(X, hash_state)
            _add_to_hash(y, hash_state)
            api_resource.save_current_hash(
            )  # in case we evaluate in a separate process
        result = self.predictor.fit(X, y, *args, **kwargs)
        if self.model_save_file is not None:
            self._save_model()
        return result
Exemple #5
0
    def score(self, X, y, sample_weight=None):
        """This method make predictions from a trained model and scores them
        according to the metrics specified when instantiated the wrapper.

        If the input resource is an api resource, the wrapper captures its hash.
        The wapper runs the wrapped predictor's :meth:`~predict` method to
        generate predictions. A `metrics` object is instantiated to compute the metrics
        for the predictions and a ``results.json`` file is written to the
        results resource. The lineage data is saved and finally the score
        is computed from the predictions and returned to the caller."""
        if self.score_has_been_run:
            # This might be from a saved model, so we reset the
            # execution time, etc.
            self._dws_state.reset_lineage()
        for (param, value) in self.predictor.get_params(deep=True).items():
            self._dws_state.lineage.add_param(param, value)
        api_resource = self._dws_state.find_input_resources_and_return_if_api(
            X, y)
        if api_resource is not None:
            api_resource.dup_hash_state()
            hash_state = api_resource.get_hash_state()
            _add_to_hash(X, hash_state)
            if y is not None:
                _add_to_hash(y, hash_state)
            api_resource.save_current_hash()
            api_resource.pop_hash_state()
        predictions = self.predictor.predict(X)
        if isinstance(self.metrics, str):
            metrics_inst = _METRICS[self.metrics](
                y, predictions, sample_weight=sample_weight)  # type: ignore
        else:
            metrics_inst = self.metrics(y,
                                        predictions,
                                        sample_weight=sample_weight)
        self._dws_state.write_metrics_and_complete(metrics_inst.to_dict())
        self.score_has_been_run = True
        return metrics_inst.score()
 def __getitem__(self, idx):
     v = self.wrapped.__getitem__(idx)
     if len(v) == 2:
         (inputs, targets) = v
         sample_weights = None
     else:
         (inputs, targets, sample_weights) = v
     _add_to_hash(inputs, self.hash_state)
     _add_to_hash(targets, self.hash_state)
     if sample_weights is not None:
         _add_to_hash(sample_weights, self.hash_state)
     return v
 def wrapper():
     for v in wrapped:
         if len(v) == 2:
             (inputs, targets) = v
             sample_weights = None
         else:
             (inputs, targets, sample_weights) = v
         _add_to_hash(inputs, hash_state)
         _add_to_hash(targets, hash_state)
         if sample_weights is not None:
             _add_to_hash(sample_weights, hash_state)
         yield v
 def test_tensorflow_tensor(self):
     dataset = tensorflow.data.Dataset.from_tensor_slices(
         numpy.arange(100).reshape((10, 10)))
     for i in dataset:
         _add_to_hash(i, self.hash_state)
     print(self.hash_state.hexdigest())
 def test_numpy(self):
     a = numpy.arange(45)
     _add_to_hash(a, self.hash_state)
     print(self.hash_state.hexdigest())
 def test_pandas_series(self):
     s = pandas.Series([1, 0, 0, 1, 1], name='y')
     _add_to_hash(s, self.hash_state)
     print(self.hash_state.hexdigest())