Python _add_to_hash Beispiele, dataworkspaces.kits.wrapper_utils._add_to_hash Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: tensorflow.py Projekt: kisdma/data-workspaces-core

 def evaluate(self, x, y=None, **kwargs):
     if "batch_size" in kwargs:
         self._dws_state.lineage.add_param("evaluate.batch_size", kwargs["batch_size"])
     else:
         self._dws_state.lineage.add_param("evaluate.batch_size", None)
     api_resource = self._dws_state.find_input_resources_and_return_if_api(x, y)
     if api_resource is not None:
         _verify_eager_if_dataset(x, y, api_resource)
         api_resource.dup_hash_state()
         hash_state = api_resource.get_hash_state()
         if isinstance(x, kerasutils.Sequence):
             if y is not None:
                 raise NotSupportedError(
                     "evaluate() method does not suppport a generator for x AND a y value"
                 )
             x = _TfKerasSequenceWrapper(x, hash_state)
         elif isinstance(x, GeneratorType):
             if y is not None:
                 raise NotSupportedError(
                     "evaluate() method does not suppport a generator for x AND a y value"
                 )
             x = _wrap_generator(x, hash_state)
         else:
             _add_to_hash(x, hash_state)
             if y is not None:
                 _add_to_hash(y, hash_state)
     results = super().evaluate(x, y, **kwargs)
     assert len(results) == len(self.metrics_names)
     if api_resource is not None:
         api_resource.save_current_hash()
         api_resource.pop_hash_state()
     self._dws_state.write_metrics_and_complete(
         {n: v for (n, v) in zip(self.metrics_names, results)}
     )
     return results

Beispiel #2

0

Datei anzeigen

Datei: test_wrapper_utils.py Projekt: kisdma/data-workspaces-core

 def test_pandas_df(self):
     df = pandas.DataFrame({
         'x1': [1, 2, 3, 4, 5],
         'x2': [1.5, 2.5, 3.5, 4.5, 5.5],
         'y': [1, 0, 0, 1, 1]
     })
     _add_to_hash(df, self.hash_state)
     print(self.hash_state.hexdigest())

Beispiel #3

0

Datei anzeigen

Datei: tensorflow.py Projekt: data-workspaces/data-workspaces-core

 def fit(self, x, y=None, **kwargs):
     """x, y can be arrays or x can be a generator.
     """
     if "epochs" in kwargs:
         self._dws_state.lineage.add_param("fit.epochs",
                                           kwargs["epochs"])
     else:
         self._dws_state.lineage.add_param("fit.epochs", 1)
     if "batch_size" in kwargs:
         self._dws_state.lineage.add_param("fit.batch_size",
                                           kwargs["batch_size"])
     else:
         self._dws_state.lineage.add_param("fit.batch_size", None)
     api_resource = self._dws_state.find_input_resources_and_return_if_api(
         x, y)
     if api_resource is not None:
         _verify_eager_if_dataset(x, y, api_resource)
         api_resource.init_hash_state()
         hash_state = api_resource.get_hash_state()
         if isinstance(x, kerasutils.Sequence):
             if y is not None:
                 raise NotSupportedError(
                     "fit() method does not suppport a generator for x AND a y value"
                 )
             x = _TfKerasSequenceWrapper(x, hash_state)
         elif isinstance(x, GeneratorType):
             if y is not None:
                 raise NotSupportedError(
                     "fit() method does not suppport a generator for x AND a y value"
                 )
             x = _wrap_generator(x, hash_state)
         else:  # x and y are provided as full arrays
             _add_to_hash(x, hash_state)
             if y is not None:
                 _add_to_hash(y, hash_state)
             api_resource.save_current_hash(
             )  # in case we evaluate in a separate process
     if self.checkpoint_cb:
         if "callbacks" in kwargs:
             kwargs["callbacks"].append(self.checkpoint_cb)
         else:
             kwargs["callbacks"] = [
                 self.checkpoint_cb,
             ]
     return super().fit(x, y, **kwargs)

Beispiel #4

0

Datei anzeigen

    def fit(self, X, y, *args, **kwargs):
        """The underlying fit() method of a predictor trains the predictio based
        on the input data (X) and labels (y).

        If the input resource is an api resource, the wrapper captures the hash of
        the inputs.
        If ``model_save_file`` was specified, it also saves the trained model."""
        api_resource = self._dws_state.find_input_resources_and_return_if_api(
            X, y)
        if api_resource is not None:
            api_resource.init_hash_state()
            hash_state = api_resource.get_hash_state()
            _add_to_hash(X, hash_state)
            _add_to_hash(y, hash_state)
            api_resource.save_current_hash(
            )  # in case we evaluate in a separate process
        result = self.predictor.fit(X, y, *args, **kwargs)
        if self.model_save_file is not None:
            self._save_model()
        return result

Beispiel #5

0

Datei anzeigen

    def score(self, X, y, sample_weight=None):
        """This method make predictions from a trained model and scores them
        according to the metrics specified when instantiated the wrapper.

        If the input resource is an api resource, the wrapper captures its hash.
        The wapper runs the wrapped predictor's :meth:`~predict` method to
        generate predictions. A `metrics` object is instantiated to compute the metrics
        for the predictions and a ``results.json`` file is written to the
        results resource. The lineage data is saved and finally the score
        is computed from the predictions and returned to the caller."""
        if self.score_has_been_run:
            # This might be from a saved model, so we reset the
            # execution time, etc.
            self._dws_state.reset_lineage()
        for (param, value) in self.predictor.get_params(deep=True).items():
            self._dws_state.lineage.add_param(param, value)
        api_resource = self._dws_state.find_input_resources_and_return_if_api(
            X, y)
        if api_resource is not None:
            api_resource.dup_hash_state()
            hash_state = api_resource.get_hash_state()
            _add_to_hash(X, hash_state)
            if y is not None:
                _add_to_hash(y, hash_state)
            api_resource.save_current_hash()
            api_resource.pop_hash_state()
        predictions = self.predictor.predict(X)
        if isinstance(self.metrics, str):
            metrics_inst = _METRICS[self.metrics](
                y, predictions, sample_weight=sample_weight)  # type: ignore
        else:
            metrics_inst = self.metrics(y,
                                        predictions,
                                        sample_weight=sample_weight)
        self._dws_state.write_metrics_and_complete(metrics_inst.to_dict())
        self.score_has_been_run = True
        return metrics_inst.score()

Beispiel #6

0

Datei anzeigen

Datei: tensorflow.py Projekt: data-workspaces/data-workspaces-core

 def __getitem__(self, idx):
     v = self.wrapped.__getitem__(idx)
     if len(v) == 2:
         (inputs, targets) = v
         sample_weights = None
     else:
         (inputs, targets, sample_weights) = v
     _add_to_hash(inputs, self.hash_state)
     _add_to_hash(targets, self.hash_state)
     if sample_weights is not None:
         _add_to_hash(sample_weights, self.hash_state)
     return v

Beispiel #7

0

Datei anzeigen

Datei: tensorflow.py Projekt: data-workspaces/data-workspaces-core

 def wrapper():
     for v in wrapped:
         if len(v) == 2:
             (inputs, targets) = v
             sample_weights = None
         else:
             (inputs, targets, sample_weights) = v
         _add_to_hash(inputs, hash_state)
         _add_to_hash(targets, hash_state)
         if sample_weights is not None:
             _add_to_hash(sample_weights, hash_state)
         yield v

Beispiel #8

0

Datei anzeigen

Datei: test_wrapper_utils.py Projekt: kisdma/data-workspaces-core

 def test_tensorflow_tensor(self):
     dataset = tensorflow.data.Dataset.from_tensor_slices(
         numpy.arange(100).reshape((10, 10)))
     for i in dataset:
         _add_to_hash(i, self.hash_state)
     print(self.hash_state.hexdigest())

Beispiel #9

0

Datei anzeigen

Datei: test_wrapper_utils.py Projekt: kisdma/data-workspaces-core

 def test_numpy(self):
     a = numpy.arange(45)
     _add_to_hash(a, self.hash_state)
     print(self.hash_state.hexdigest())

Beispiel #10

0

Datei anzeigen

Datei: test_wrapper_utils.py Projekt: kisdma/data-workspaces-core

 def test_pandas_series(self):
     s = pandas.Series([1, 0, 0, 1, 1], name='y')
     _add_to_hash(s, self.hash_state)
     print(self.hash_state.hexdigest())