def evaluate(self, x, y=None, **kwargs): if "batch_size" in kwargs: self._dws_state.lineage.add_param("evaluate.batch_size", kwargs["batch_size"]) else: self._dws_state.lineage.add_param("evaluate.batch_size", None) api_resource = self._dws_state.find_input_resources_and_return_if_api(x, y) if api_resource is not None: _verify_eager_if_dataset(x, y, api_resource) api_resource.dup_hash_state() hash_state = api_resource.get_hash_state() if isinstance(x, kerasutils.Sequence): if y is not None: raise NotSupportedError( "evaluate() method does not suppport a generator for x AND a y value" ) x = _TfKerasSequenceWrapper(x, hash_state) elif isinstance(x, GeneratorType): if y is not None: raise NotSupportedError( "evaluate() method does not suppport a generator for x AND a y value" ) x = _wrap_generator(x, hash_state) else: _add_to_hash(x, hash_state) if y is not None: _add_to_hash(y, hash_state) results = super().evaluate(x, y, **kwargs) assert len(results) == len(self.metrics_names) if api_resource is not None: api_resource.save_current_hash() api_resource.pop_hash_state() self._dws_state.write_metrics_and_complete( {n: v for (n, v) in zip(self.metrics_names, results)} ) return results
def test_pandas_df(self): df = pandas.DataFrame({ 'x1': [1, 2, 3, 4, 5], 'x2': [1.5, 2.5, 3.5, 4.5, 5.5], 'y': [1, 0, 0, 1, 1] }) _add_to_hash(df, self.hash_state) print(self.hash_state.hexdigest())
def fit(self, x, y=None, **kwargs): """x, y can be arrays or x can be a generator. """ if "epochs" in kwargs: self._dws_state.lineage.add_param("fit.epochs", kwargs["epochs"]) else: self._dws_state.lineage.add_param("fit.epochs", 1) if "batch_size" in kwargs: self._dws_state.lineage.add_param("fit.batch_size", kwargs["batch_size"]) else: self._dws_state.lineage.add_param("fit.batch_size", None) api_resource = self._dws_state.find_input_resources_and_return_if_api( x, y) if api_resource is not None: _verify_eager_if_dataset(x, y, api_resource) api_resource.init_hash_state() hash_state = api_resource.get_hash_state() if isinstance(x, kerasutils.Sequence): if y is not None: raise NotSupportedError( "fit() method does not suppport a generator for x AND a y value" ) x = _TfKerasSequenceWrapper(x, hash_state) elif isinstance(x, GeneratorType): if y is not None: raise NotSupportedError( "fit() method does not suppport a generator for x AND a y value" ) x = _wrap_generator(x, hash_state) else: # x and y are provided as full arrays _add_to_hash(x, hash_state) if y is not None: _add_to_hash(y, hash_state) api_resource.save_current_hash( ) # in case we evaluate in a separate process if self.checkpoint_cb: if "callbacks" in kwargs: kwargs["callbacks"].append(self.checkpoint_cb) else: kwargs["callbacks"] = [ self.checkpoint_cb, ] return super().fit(x, y, **kwargs)
def fit(self, X, y, *args, **kwargs): """The underlying fit() method of a predictor trains the predictio based on the input data (X) and labels (y). If the input resource is an api resource, the wrapper captures the hash of the inputs. If ``model_save_file`` was specified, it also saves the trained model.""" api_resource = self._dws_state.find_input_resources_and_return_if_api( X, y) if api_resource is not None: api_resource.init_hash_state() hash_state = api_resource.get_hash_state() _add_to_hash(X, hash_state) _add_to_hash(y, hash_state) api_resource.save_current_hash( ) # in case we evaluate in a separate process result = self.predictor.fit(X, y, *args, **kwargs) if self.model_save_file is not None: self._save_model() return result
def score(self, X, y, sample_weight=None): """This method make predictions from a trained model and scores them according to the metrics specified when instantiated the wrapper. If the input resource is an api resource, the wrapper captures its hash. The wapper runs the wrapped predictor's :meth:`~predict` method to generate predictions. A `metrics` object is instantiated to compute the metrics for the predictions and a ``results.json`` file is written to the results resource. The lineage data is saved and finally the score is computed from the predictions and returned to the caller.""" if self.score_has_been_run: # This might be from a saved model, so we reset the # execution time, etc. self._dws_state.reset_lineage() for (param, value) in self.predictor.get_params(deep=True).items(): self._dws_state.lineage.add_param(param, value) api_resource = self._dws_state.find_input_resources_and_return_if_api( X, y) if api_resource is not None: api_resource.dup_hash_state() hash_state = api_resource.get_hash_state() _add_to_hash(X, hash_state) if y is not None: _add_to_hash(y, hash_state) api_resource.save_current_hash() api_resource.pop_hash_state() predictions = self.predictor.predict(X) if isinstance(self.metrics, str): metrics_inst = _METRICS[self.metrics]( y, predictions, sample_weight=sample_weight) # type: ignore else: metrics_inst = self.metrics(y, predictions, sample_weight=sample_weight) self._dws_state.write_metrics_and_complete(metrics_inst.to_dict()) self.score_has_been_run = True return metrics_inst.score()
def __getitem__(self, idx): v = self.wrapped.__getitem__(idx) if len(v) == 2: (inputs, targets) = v sample_weights = None else: (inputs, targets, sample_weights) = v _add_to_hash(inputs, self.hash_state) _add_to_hash(targets, self.hash_state) if sample_weights is not None: _add_to_hash(sample_weights, self.hash_state) return v
def wrapper(): for v in wrapped: if len(v) == 2: (inputs, targets) = v sample_weights = None else: (inputs, targets, sample_weights) = v _add_to_hash(inputs, hash_state) _add_to_hash(targets, hash_state) if sample_weights is not None: _add_to_hash(sample_weights, hash_state) yield v
def test_tensorflow_tensor(self): dataset = tensorflow.data.Dataset.from_tensor_slices( numpy.arange(100).reshape((10, 10))) for i in dataset: _add_to_hash(i, self.hash_state) print(self.hash_state.hexdigest())
def test_numpy(self): a = numpy.arange(45) _add_to_hash(a, self.hash_state) print(self.hash_state.hexdigest())
def test_pandas_series(self): s = pandas.Series([1, 0, 0, 1, 1], name='y') _add_to_hash(s, self.hash_state) print(self.hash_state.hexdigest())