Exemple #1
0
    def compare_with_pytorch(self, pytorch_model, input_shape_with_batch,
                             input_data_with_batch=None, compare_result=True,
                             rtol=1e-6, atol=1e-6):
        input_shape_with_batch = to_list(input_shape_with_batch)
        if input_data_with_batch is not None:
            input_data_with_batch = to_list(input_data_with_batch)
        onnx_path = self.dump_pytorch_to_onnx(pytorch_model, input_shape_with_batch,
                                              input_data_with_batch)
        onnx_model = onnx.load(onnx_path)
        # TODO: we only consider single  output for now
        if input_data_with_batch is None:
            input_data_with_batch = [np.random.uniform(0, 1, shape).astype(np.float32)
                                     for shape in input_shape_with_batch]
        else:
            input_data_with_batch = [np.array(data).astype(np.long)
                                     for data in input_data_with_batch]
        # coutput = caffe2.python.onnx.backend.run_model(onnx_model, input_data_with_batch)[0]

        pytorch_out = pytorch_model.forward(self._convert_ndarray_to_tensor(input_data_with_batch))
        zmodel = OnnxLoader(onnx_model.graph).to_keras()
        zoutput = zmodel.forward(
            input_data_with_batch[0] if len(input_data_with_batch) == 1 else input_data_with_batch)
        if compare_result:
            self.assert_allclose(pytorch_out.detach().numpy(), zoutput, rtol, atol)
            assert tuple(pytorch_out.size()[1:]) == zmodel.get_output_shape()[1:]
Exemple #2
0
 def __init__(self,
              input,
              output,
              jvalue=None,
              bigdl_type="float",
              **kwargs):
     super(BModel, self).__init__(jvalue, to_list(input), to_list(output),
                                  bigdl_type, **kwargs)
Exemple #3
0
 def __call__(self, x=None):
     """
     Some other modules point to current module
     :param x: upstream module nodes. x is either a Node or list of Node.
     :return: node containing current module
     """
     x = to_list(x if x else [])
     layer = self
     if isinstance(self, Lambda):
         input_shapes = [var.get_output_shape() for var in x]
         layer = self.create(remove_batch(input_shapes))
     return Variable.from_jvalue(
         callBigDlFunc(self.bigdl_type, "connectInputs", layer, to_list(x)))
Exemple #4
0
 def __call__(self, x=None):
     """
     Some other modules point to current module
     :param x: upstream module nodes. x is either a Node or list of Node.
     :return: node containing current module
     """
     x = to_list(x if x else [])
     layer = self
     if isinstance(self, Lambda):
         input_shapes = [var.get_output_shape() for var in x]
         layer = self.create(remove_batch(input_shapes))
     return Variable.from_jvalue(callBigDlFunc(self.bigdl_type,
                                               "connectInputs",
                                               layer,
                                               to_list(x)))
Exemple #5
0
    def set_validation(self, batch_size, X_val, Y_val, trigger, val_method=None):
        """
        Configure validation settings.

        :param batch_size: validation batch size
        :param X_val: features of validation dataset
        :param Y_val: label of validation dataset
        :param trigger: validation interval
        :param val_method: the ValidationMethod to use,e.g. "Top1Accuracy", "Top5Accuracy", "Loss"
        """
        if val_method is None:
            val_method = [Top1Accuracy()]
        callBigDlFunc(self.bigdl_type, "setValidation", self.value, batch_size,
                      trigger, [JTensor.from_ndarray(X) for X in to_list(X_val)],
                      JTensor.from_ndarray(Y_val), to_list(val_method))
Exemple #6
0
    def set_validation(self, batch_size, X_val, Y_val, trigger, val_method=None):
        """
        Configure validation settings.

        :param batch_size: validation batch size
        :param X_val: features of validation dataset
        :param Y_val: label of validation dataset
        :param trigger: validation interval
        :param val_method: the ValidationMethod to use,e.g. "Top1Accuracy", "Top5Accuracy", "Loss"
        """
        if val_method is None:
            val_method = [Top1Accuracy()]
        callBigDlFunc(self.bigdl_type, "setValidation", self.value, batch_size,
                      trigger, [JTensor.from_ndarray(X) for X in to_list(X_val)],
                      JTensor.from_ndarray(Y_val), to_list(val_method))
 def __call__(self, x=None):
     """
     Some other modules point to current module
     :param x: upstream module nodes. x is either a Node or list of Node.
     :return: node containing current module
     """
     x = to_list(x if x else [])
     layer = self
     if isinstance(self, Lambda):
         input_shapes = [
             ZooKerasLayer.of(node.element().value).get_output_shape()
             for node in x
         ]
         layer = self.create(remove_batch(input_shapes))
     return Node.of(
         callBigDlFunc(self.bigdl_type, "createNode", layer, to_list(x)))
Exemple #8
0
 def __init__(self,
              X,
              Y,
              model,
              criterion,
              end_trigger,
              batch_size,
              optim_method=None,
              cores=None,
              bigdl_type="float"):
     if not optim_method:
         optim_methods = {model.name(): SGD()}
     elif isinstance(optim_method, OptimMethod):
         optim_methods = {model.name(): optim_method}
     elif isinstance(optim_method, JavaObject):
         optim_methods = {
             model.name(): OptimMethod(optim_method, bigdl_type)
         }
     else:
         optim_methods = optim_method
     if cores is None:
         cores = multiprocessing.cpu_count()
     JavaValue.__init__(self, None, bigdl_type,
                        [JTensor.from_ndarray(X) for X in to_list(X)],
                        JTensor.from_ndarray(Y), model.value, criterion,
                        optim_methods, end_trigger, batch_size, cores)
Exemple #9
0
 def __init__(self,
              X,
              Y,
              model,
              criterion,
              end_trigger,
              batch_size,
              optim_method=None,
              cores=None,
              bigdl_type="float"):
     if not optim_method:
         optim_methods = {model.name(): SGD()}
     elif isinstance(optim_method, OptimMethod):
         optim_methods = {model.name(): optim_method}
     elif isinstance(optim_method, JavaObject):
         optim_methods = {model.name(): OptimMethod(optim_method, bigdl_type)}
     else:
         optim_methods = optim_method
     if cores is None:
         cores = multiprocessing.cpu_count()
     JavaValue.__init__(self, None, bigdl_type,
                        [JTensor.from_ndarray(X) for X in to_list(X)],
                        JTensor.from_ndarray(Y),
                        model.value,
                        criterion,
                        optim_methods, end_trigger, batch_size, cores)
Exemple #10
0
    def from_loss(cls, loss, optim_method, session=None, val_outputs=None,
                  val_labels=None, val_method=None, val_split=0.0,
                  clip_norm=None, clip_value=None, metrics=None,
                  tensor_with_value=None, session_config=None, model_dir=None, updates=None):
        """
        Create a TFOptimizer from a TensorFlow loss tensor.
        The loss tensor must come from a TensorFlow graph that only takes TFDataset.tensors and
        the tensors in `tensor_with_value` as inputs.
        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param session: the current TensorFlow Session, if you want to used a pre-trained model,
        you should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        :param val_outputs: the validation output TensorFlow tensor to be used by val_methods
        :param val_labels: the validation label TensorFlow tensor to be used by val_methods
        :param val_method: the BigDL val_method(s) to be used.
        :param val_split: Float between 0 and 1. Fraction of the training data to be used as
        validation data.
        :param clip_norm: float >= 0. Gradients will be clipped when their L2 norm exceeds
        this value.
        :param clip_value: float >= 0. Gradients will be clipped when their absolute value
        exceeds this value.
        :param metrics: a dictionary. The key should be a string representing the metric's name
        and the value should be the corresponding TensorFlow tensor, which should be a scalar.
        :param tensor_with_value: a dictionary. The key is TensorFlow tensor, usually a
        placeholder, the value of the dictionary is a tuple of two elements. The first one of
        the tuple is the value to feed to the tensor in training phase and the second one
        is the value to feed to the tensor in validation phase.
        :return: a TFOptimizer
        """
        sess = TFOptimizer._get_or_create_session(session)
        grads, variables = TFOptimizer._get_vars_grads(loss)
        dataset = TFOptimizer._get_dataset_from_loss(loss)
        inputs = nest.flatten(dataset._original_tensors)

        if clip_value is not None:
            if isinstance(clip_value, float) or isinstance(clip_value, int):
                if clip_value <= 0:
                    ValueError("The clip_value argument should be positive number")
                clip_value = (-float(clip_value), float(clip_value))

            if not isinstance(clip_value, tuple):
                raise ValueError("The clip_value argument should be" +
                                 " a positive float/int which clips to" +
                                 " (-clip_value, clip_value); " +
                                 "or a tuple which clips to (min_value, max_value)")

        if val_method is not None:
            val_methods = to_list(val_method)
            if metrics is None:
                metrics = {}

            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(method, val_outputs, val_labels)

        return TFOptimizer._from_grads(loss, sess, inputs, grads, variables, dataset, optim_method,
                                       val_split, clip_norm, clip_value,
                                       metrics, tensor_with_value, session_config,
                                       model_dir, updates)
Exemple #11
0
 def to_bigdl_metrics(metrics):
     metrics = to_list(metrics)
     bmetrics = []
     for metric in metrics:
         if metric == "accuracy":
             bmetrics.append(boptimizer.Top1Accuracy())
         else:
             unsupport_exp(metric)
     # TODO: add more metrics
     return bmetrics
Exemple #12
0
    def set_validation(self, batch_size, val_rdd, trigger, val_method=None):
        """
        Configure validation settings.


        :param batch_size: validation batch size
        :param val_rdd: validation dataset
        :param trigger: validation interval
        :param val_method: the ValidationMethod to use,e.g. "Top1Accuracy", "Top5Accuracy", "Loss"
        """
        if val_method is None:
            val_method = [Top1Accuracy()]
        callBigDlFunc(self.bigdl_type, "setValidation", self.value, batch_size,
                      trigger, val_rdd, to_list(val_method))
Exemple #13
0
    def set_validation(self, batch_size, val_rdd, trigger, val_method=None):
        """
        Configure validation settings.


        :param batch_size: validation batch size
        :param val_rdd: validation dataset
        :param trigger: validation interval
        :param val_method: the ValidationMethod to use,e.g. "Top1Accuracy", "Top5Accuracy", "Loss"
        """
        if val_method is None:
            val_method = [Top1Accuracy()]
        func_name = "setValidation"
        if isinstance(val_rdd, DataSet):
            func_name = "setValidationFromDataSet"
        callBigDlFunc(self.bigdl_type, func_name, self.value, batch_size,
                      trigger, val_rdd, to_list(val_method))
Exemple #14
0
 def __init__(self,
              X,
              y,
              model,
              criterion,
              end_trigger,
              batch_size,
              optim_method=None,
              cores=None,
              bigdl_type="float"):
     if cores is None:
         cores = multiprocessing.cpu_count()
     JavaValue.__init__(self, None, bigdl_type,
                        [JTensor.from_ndarray(X) for X in to_list(X)],
                        JTensor.from_ndarray(y), model.value, criterion,
                        optim_method if optim_method else SGD(),
                        end_trigger, batch_size, cores)
    def compare_with_pytorch(self, pytorch_model, input_shape_with_batch):
        input_shape_with_batch = to_list(input_shape_with_batch)
        onnx_path = self.dump_pytorch_to_onnx(pytorch_model,
                                              input_shape_with_batch)
        onnx_model = onnx.load(onnx_path)
        # TODO: we only consider single  output for now.
        input_data_with_batch = [
            np.random.uniform(0, 1, shape).astype(np.float32)
            for shape in input_shape_with_batch
        ]
        # coutput = caffe2.python.onnx.backend.run_model(onnx_model, input_data_with_batch)[0]

        pytorch_out = pytorch_model.forward(
            self._convert_ndarray_to_tensor(input_data_with_batch))
        zmodel = OnnxLoader(onnx_model.graph).to_keras()
        zoutput = zmodel.forward(input_data_with_batch[0] if len(
            input_data_with_batch) == 1 else input_data_with_batch)
        self.assert_allclose(pytorch_out.detach().numpy(), zoutput)
Exemple #16
0
 def __init__(self, input, output, jvalue=None, **kwargs):
     super(Model, self).__init__(jvalue,
                                 to_list(input),
                                 to_list(output),
                                 **kwargs)
    def from_keras(cls, keras_model, dataset, optim_method=None,
                   session_config=None, model_dir=None):
        """
        Create a TFOptimizer from a tensorflow.keras model. The model must be compiled.
        :param keras_model: the tensorflow.keras model, which must be compiled.
        :param dataset: a TFDataset
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        validation data.
        :return:
        """
        import tensorflow.keras.backend as K

        model_inputs = keras_model.inputs
        if hasattr(keras_model, "targets"):
            model_targets = keras_model.targets
        else:
            model_targets = keras_model._targets

        flatten_inputs = nest.flatten(dataset.feature_tensors)
        assert len(model_inputs) == len(flatten_inputs), \
            ("the keras model and TFDataset should have the same number of tensors" +
             " keras model has {} inputs " +
             "while TFDataset has {} inputs").format(len(model_inputs),
                                                     len(flatten_inputs))
        for i in range(len(flatten_inputs)):
            if not TFOptimizer._shape_match(model_inputs[i].shape, flatten_inputs[i].shape):
                raise ValueError(("The {}th input in keras model {}"
                                  " does not match the TFDataset"
                                  "input {}").format(i,
                                                     model_inputs[i],
                                                     flatten_inputs[i]))

        flatten_targets = nest.flatten(dataset.label_tensors)
        assert len(model_targets) == len(flatten_targets), \
            ("the keras model and TFDataset should have the same number of tensors" +
             " keras model has {} targets " +
             "while TFDataset has {} labels").format(len(model_targets),
                                                     len(flatten_inputs))
        # todo check targets shape, currently checking target shape will
        # cause too much false alarm.

        loss = keras_model.total_loss
        variables = keras_model._collected_trainable_weights
        variables.sort(key=lambda variable: variable.name)
        keras_optimizer = keras_model.optimizer

        grads = K.gradients(loss, variables)
        if None in grads:
            raise ValueError('An operation has `None` for gradient. '
                             'Please make sure that all of your ops have a '
                             'gradient defined (i.e. are differentiable). '
                             'Common ops without gradient: '
                             'K.argmax, K.round, K.eval.')
        clip_norm = None
        clip_value = None
        if hasattr(keras_optimizer, 'clipnorm'):
            clip_norm = keras_optimizer.clipnorm
        if hasattr(keras_optimizer, 'clipvalue'):
            clip_value = (-keras_optimizer.clipvalue, keras_optimizer.clipvalue)

        sess = K.get_session()
        if optim_method is None:
            optim_method = keras_optimizer
        optim_method = to_bigdl_optim_method(optim_method)

        if keras_model.metrics and (dataset.get_validation_data() is not None):
            if isinstance(keras_model.metrics, dict):
                raise ValueError(
                    "different metrics for different outputs are not supported right now")

            if dataset.get_validation_data() is None:
                raise ValueError("Validation data is not specified. Please set " +
                                 "val_rdd in TFDataset")

            if len(keras_model.outputs) > 1:
                if not all([name.endswith("loss") for name in keras_model.metrics_names]):
                    raise ValueError("metrics (except loss) for multi-head model is not supported")
                else:
                    bigdl_val_methods = [Loss()]
                    val_outputs = keras_model.outputs
                    val_labels = model_targets
            else:
                bigdl_val_methods = \
                    [to_bigdl_metric(m, keras_model.loss) for m in keras_model.metrics_names]
                val_outputs = keras_model.outputs
                val_labels = model_targets
        else:
            val_outputs = None
            val_labels = None
            bigdl_val_methods = None

        tensor_with_value = {
            K.learning_phase(): [True, False]
        }

        updates = keras_model.updates

        metrics = None

        if bigdl_val_methods is not None:
            val_methods = to_list(bigdl_val_methods)
            metrics = {}
            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(method, val_outputs, val_labels)

        tf_model = TFModel.create(loss, sess, model_inputs, model_targets, keras_model.outputs,
                                  grads, variables, loss.graph,
                                  tensor_with_value, session_config, metrics,
                                  updates, model_dir=None)

        return cls(tf_model, optim_method, sess=sess, dataset=dataset,
                   clip_norm=clip_norm, clip_value=clip_value, model_dir=model_dir)
    def from_keras(cls,
                   keras_model,
                   dataset,
                   session_config=None,
                   model_dir=None,
                   metrics=None,
                   optimizer=None):
        """
        Create a TFOptimizer from a tensorflow.keras model. The model must be compiled.
        :param keras_model: the tensorflow.keras model, which must be compiled.
        :param dataset: a TFDataset
        :return:
        """
        import tensorflow.keras.backend as K

        model_inputs = keras_model.inputs

        if hasattr(keras_model, "targets"):
            model_targets = keras_model.targets
        else:
            model_targets = keras_model._targets

        # target can be None if loss is None
        model_targets = list(filter(lambda x: x is not None, model_targets))

        # standarize feature, labels to support keras model
        if isinstance(dataset, TFNdarrayDataset):
            dataset = _standarize_feature_label_dataset(dataset, keras_model)

        flatten_inputs = nest.flatten(dataset.feature_tensors)
        assert len(model_inputs) == len(flatten_inputs), \
            ("the keras model and TFDataset should have the same number of tensors" +
             " keras model has {} inputs " +
             "while TFDataset has {} inputs").format(len(model_inputs),
                                                     len(flatten_inputs))
        for i in range(len(flatten_inputs)):
            if not TFOptimizer._shape_match(model_inputs[i].shape,
                                            flatten_inputs[i].shape):
                raise ValueError(("The {}th input in keras model {}"
                                  " does not match the TFDataset"
                                  "input {}").format(i, model_inputs[i],
                                                     flatten_inputs[i]))

        flatten_targets = nest.flatten(dataset.label_tensors)
        assert len(model_targets) == len(flatten_targets), \
            ("the keras model and TFDataset should have the same number of tensors" +
             " keras model has {} targets " +
             "while TFDataset has {} labels").format(len(model_targets),
                                                     len(flatten_inputs))
        # todo check targets shape, currently checking target shape will
        # cause too much false alarm.

        loss = keras_model.total_loss
        variables = keras_model._collected_trainable_weights
        variables.sort(key=lambda variable: variable.name)
        keras_optimizer = keras_model.optimizer

        from zoo.tfpark.zoo_optimizer import get_gradients_for_keras
        grads = get_gradients_for_keras(keras_optimizer, loss, variables)
        grads_and_vars = list(zip(grads, variables))
        import tensorflow.python.keras.optimizers as koptimizers
        if isinstance(keras_optimizer, koptimizers.TFOptimizer):
            # work around keras TFOptimzier bug
            train_op = keras_optimizer.optimizer.apply_gradients(
                grads_and_vars)
        else:
            train_op = keras_optimizer.apply_gradients(grads_and_vars)

        sess = K.get_session()

        if keras_model.metrics and (dataset.get_validation_data() is not None):
            if isinstance(keras_model.metrics, dict):
                raise ValueError(
                    "different metrics for different outputs are not supported right now"
                )

            if len(keras_model.outputs) > 1:
                if not all([
                        name.endswith("loss")
                        for name in keras_model.metrics_names
                ]):
                    raise ValueError(
                        "metrics (except loss) for multi-head model is not supported"
                    )
                else:
                    bigdl_val_methods = [Loss()]
                    val_outputs = keras_model.outputs
                    val_labels = model_targets
            else:
                bigdl_val_methods = \
                    [to_bigdl_metric(m, keras_model.loss) for m in keras_model.metrics_names]
                val_outputs = keras_model.outputs
                val_labels = model_targets
        else:
            val_outputs = None
            val_labels = None
            bigdl_val_methods = None

        tensor_with_value = {K.learning_phase(): [True, False]}

        updates = []

        updates += keras_model.get_updates_for(None)
        # Conditional updates relevant to this model
        updates += keras_model.get_updates_for(keras_model.inputs)

        if bigdl_val_methods is not None:
            val_methods = to_list(bigdl_val_methods)
            bigdl_metrics = {}
            for i, method in enumerate(val_methods):
                bigdl_metrics['bigdl_metric_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)
            if metrics is None:
                metrics = bigdl_metrics
            else:
                metrics.update(bigdl_metrics)

        if optimizer is not None:
            clip_norm = None
            clip_value = None
            if hasattr(keras_optimizer, 'clipnorm'):
                clip_norm = keras_optimizer.clipnorm
            if hasattr(keras_optimizer, 'clipvalue'):
                clip_value = (-keras_optimizer.clipvalue,
                              keras_optimizer.clipvalue)
            tf_model = TFModel.create(loss,
                                      sess,
                                      model_inputs,
                                      model_targets,
                                      keras_model.outputs,
                                      grads,
                                      variables,
                                      loss.graph,
                                      tensor_with_value,
                                      session_config,
                                      metrics,
                                      updates,
                                      model_dir=None)

            return cls(tf_model,
                       optimizer,
                       sess=sess,
                       dataset=dataset,
                       clip_norm=clip_norm,
                       clip_value=clip_value,
                       model_dir=model_dir)

        return cls.from_train_op(train_op,
                                 loss,
                                 inputs=model_inputs,
                                 labels=model_targets,
                                 metrics=metrics,
                                 updates=updates,
                                 sess=sess,
                                 dataset=dataset,
                                 tensor_with_value=tensor_with_value,
                                 session_config=session_config,
                                 model_dir=model_dir)
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 dataset=None,
                 inputs=None,
                 grads=None,
                 variables=None,
                 graph=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None,
                 val_split=0.0,
                 tensors_with_value=None,
                 session_config=None,
                 clip_norm=None,
                 clip_value=None,
                 metrics=None,
                 updates=None,
                 freeze=False,
                 model_dir=None):
        """
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        Note that if grads and variables are not None, then they need to be sorted by name
        if you want to use multiple optimization methods for a TensorFlow model according to
        variable names.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        """

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(
                loss, optim_method, sess, val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[
                5:]

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.graph = graph

        self.clip_norm = clip_norm
        if clip_value is not None and not isinstance(clip_value, tuple):
            raise ValueError(
                "The clip_value argument should be a tuple (min_value, max_value)"
            )
        self.clip_constant = clip_value

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        if val_method is not None:
            val_methods = to_list(val_method)
            if metrics is None:
                metrics = {}

            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)

        if model_dir is None:
            model_dir = tempfile.mkdtemp()
        else:
            if not os.path.isdir(model_dir):
                os.makedirs(model_dir)

        self.model_dir = model_dir

        if freeze:
            self.tf_model = TFModel.create(loss, sess, inputs, grads,
                                           variables, graph,
                                           tensors_with_value, session_config,
                                           metrics, updates, model_dir)
        else:
            self.tf_model = TFModel.create_for_unfreeze(
                loss, sess, inputs, grads, variables, graph,
                tensors_with_value, session_config, metrics, updates,
                model_dir)

        batch_size = self.dataset.batch_size

        sample_rdd = self.dataset.get_training_data()

        if val_split != 0.0:
            training_rdd, val_rdd = sample_rdd.randomSplit(
                [1 - val_split, val_split])
        else:
            training_rdd = sample_rdd
            val_rdd = self.dataset.get_validation_data()

        self.training_rdd = training_rdd
        self.val_rdd = val_rdd
        self.batch_size = batch_size

        self.estimator = Estimator(self.tf_model.training_helper_layer,
                                   self.optim_method, model_dir)

        if self.clip_norm:
            self.estimator.set_l2_norm_gradient_clipping(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.estimator.set_constant_gradient_clipping(min_value, max_value)
Exemple #20
0
    def __init__(self, loss, optim_method, sess=None, dataset=None, inputs=None,
                 grads=None, variables=None, graph=None,
                 val_outputs=None, val_labels=None, val_method=None, val_split=0.0,
                 tensors_with_value=None, session_config=None):
        '''
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''

        import tensorflow as tf
        from tensorflow.python.util import nest
        from zoo.util.tf import export_tf

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(loss, optim_method, sess,
                                                        val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[5:]

        additional_inputs = []
        additional_values = []
        all_required_inputs = _find_placeholders([loss])
        all_required_inputs_names = [v.name for v in all_required_inputs]
        if tensors_with_value:
            for t, v in tensors_with_value.items():
                if t.name in all_required_inputs_names:
                    additional_inputs.append(t)
                    additional_values.append(v)

        if not isinstance(inputs, list):
            inputs = nest.flatten(inputs)

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.inputs = inputs + additional_inputs
        self.graph = graph
        self.session_config = session_config

        from zoo.util.tf import process_grad
        grads = [process_grad(grad) for grad in grads]

        if self.dataset.batch_size <= 0:
            raise ValueError("You should set batch_size instead of batch_per_thread for training")

        if val_outputs is not None and val_labels is not None:
            with self.graph.as_default():
                val_labels = [tf.identity(v) for v in val_labels]
            outputs = val_outputs + val_labels + [loss]
        else:
            outputs = [loss]

        self.grads = grads
        self.outputs = outputs

        self.export_dir = tempfile.mkdtemp()
        export_tf(self.sess, self.export_dir,
                  inputs=self.inputs,
                  outputs=self.grads + self.outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        def to_floats(vs):
            return [float(v) for v in vs]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names,
            "default_tensor_values": [to_floats(v) for v in additional_values]
        }

        with open(os.path.join(self.export_dir, "training_meta.json"), "w") as f:
            f.write(json.dumps(meta))

        self.variable_placeholders = []
        with self.graph.as_default():
            assigns = []
            for v in variables:
                p = tf.placeholder(dtype=tf.float32, shape=v.shape)
                a = tf.assign(v, p)
                self.variable_placeholders.append(p)
                assigns.append(a)
            assign = tf.group(*assigns)
        self.assign = assign
        try:
            self.training_helper_layer = TFTrainingHelper(self.export_dir, session_config)
        except Py4JJavaError as e:
            if "expects to be colocated with unknown node" in str(e):
                raise Exception("""
If you are using the embedding layer in tf.keras, then this is a
known issue of TensorFlow, see https://github.com/tensorflow/tensorflow/issues/21889.
Please add zoo.util.tf.variable_creator_scope before model construction.
For example:
from zoo.util.tf import variable_creator_scope
with variable_creator_scope():
    model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(1, 1, input_length=1)])
                """)
            else:
                raise e

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size

        def to_sample(t):
            if isinstance(t, list):
                t = tuple(t)
            return Sample.from_ndarray(nest.flatten(t), [np.array([0.0])])

        sample_rdd = data.map(to_sample)
        if val_outputs is not None and val_labels is not None:
            if self.dataset.val_rdd is not None:
                val_rdd = self.dataset.val_rdd.map(to_sample)
                val_method = [TFValidationMethod(m, len(val_outputs), len(val_labels))
                              for m in to_list(val_method)]
                training_rdd = sample_rdd

            elif val_split != 0.0:
                training_rdd, val_rdd = sample_rdd.randomSplit([1 - val_split, val_split])
                val_method = [TFValidationMethod(m, len(val_outputs), len(val_labels))
                              for m in to_list(val_method)]
            else:
                raise ValueError("Validation data is not specified. Please set " +
                                 "val rdd in TFDataset, or set val_split larger than zero")

            self.optimizer = Optimizer.create(self.training_helper_layer,
                                              training_rdd,
                                              IdentityCriterion(),
                                              batch_size=batch_size,
                                              optim_method=self.optim_method)
            self.optimizer.set_validation(self.dataset.batch_size,
                                          val_rdd,
                                          EveryEpoch(),
                                          val_method)
        else:
            training_rdd = sample_rdd
            self.optimizer = Optimizer.create(self.training_helper_layer,
                                              training_rdd,
                                              IdentityCriterion(),
                                              batch_size=batch_size,
                                              optim_method=self.optim_method)
Exemple #21
0
    def from_keras(cls, keras_model, dataset, optim_method=None, val_split=0.0,
                   session_config=None, model_dir=None):
        """
        Create a TFOptimizer from a tensorflow.keras model. The model must be compiled.
        :param keras_model: the tensorflow.keras model, which must be compiled.
        :param dataset: a TFDataset
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param val_split: Float between 0 and 1. Fraction of the training data to be used as
        validation data.
        :return:
        """
        import tensorflow.keras.backend as K

        model_inputs = keras_model.inputs
        if hasattr(keras_model, "targets"):
            model_targets = keras_model.targets
        else:
            model_targets = keras_model._targets

        inputs = model_inputs + model_targets

        loss = keras_model.total_loss
        variables = keras_model._collected_trainable_weights
        variables.sort(key=lambda variable: variable.name)
        keras_optimizer = keras_model.optimizer

        grads = K.gradients(loss, variables)
        if None in grads:
            raise ValueError('An operation has `None` for gradient. '
                             'Please make sure that all of your ops have a '
                             'gradient defined (i.e. are differentiable). '
                             'Common ops without gradient: '
                             'K.argmax, K.round, K.eval.')
        clip_norm = None
        clip_value = None
        if hasattr(keras_optimizer, 'clipnorm'):
            clip_norm = keras_optimizer.clipnorm
        if hasattr(keras_optimizer, 'clipvalue'):
            clip_value = (-keras_optimizer.clipvalue, keras_optimizer.clipvalue)

        sess = K.get_session()
        if optim_method is None:
            optim_method = keras_optimizer
        optim_method = to_bigdl_optim_method(optim_method)

        if keras_model.metrics and (dataset.get_validation_data() is not None or val_split != 0.0):
            if isinstance(keras_model.metrics, dict):
                raise ValueError(
                    "different metrics for different outputs are not supported right now")

            if dataset.get_validation_data() is None and val_split == 0.0:
                raise ValueError("Validation data is not specified. Please set " +
                                 "val_rdd in TFDataset, or set val_split larger than zero")

            if len(keras_model.outputs) > 1:
                if not all([name.endswith("loss") for name in keras_model.metrics_names]):
                    raise ValueError("metrics (except loss) for multi-head model is not supported")
                else:
                    bigdl_val_methods = [Loss()]
                    val_outputs = keras_model.outputs
                    val_labels = model_targets
            else:
                bigdl_val_methods = \
                    [to_bigdl_metric(m, keras_model.loss) for m in keras_model.metrics_names]
                val_outputs = keras_model.outputs
                val_labels = model_targets
        else:
            val_outputs = None
            val_labels = None
            bigdl_val_methods = None

        tensor_with_value = {
            K.learning_phase(): [True, False]
        }

        updates = keras_model.updates

        metrics = None

        if bigdl_val_methods is not None:
            val_methods = to_list(bigdl_val_methods)
            metrics = {}
            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(method, val_outputs, val_labels)

        tf_model = TFModel.create(loss, sess, inputs, grads, variables, loss.graph,
                                  tensor_with_value, session_config, metrics,
                                  updates, model_dir)

        return cls(tf_model, optim_method, sess=sess, dataset=dataset, val_split=val_split,
                   clip_norm=clip_norm, clip_value=clip_value)
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 dataset=None,
                 inputs=None,
                 grads=None,
                 variables=None,
                 graph=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None,
                 val_split=0.0,
                 tensors_with_value=None,
                 session_config=None,
                 clip_norm=None,
                 clip_value=None,
                 metrics=None):
        '''
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(
                loss, optim_method, sess, val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[
                5:]

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.graph = graph

        self.clip_norm = clip_norm
        if clip_value is not None and not isinstance(clip_value, tuple):
            raise ValueError(
                "The clip_value argument should be a tuple (min_value, max_value)"
            )
        self.clip_constant = clip_value

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        if val_method is not None:
            val_methods = to_list(val_method)
            if metrics is None:
                metrics = {}

            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)

        self.tf_model = TFModel.create(loss, sess, inputs, grads, variables,
                                       graph, tensors_with_value,
                                       session_config, metrics)

        batch_size = self.dataset.batch_size

        sample_rdd = self.dataset.get_training_data()

        if val_split != 0.0:
            training_rdd, val_rdd = sample_rdd.randomSplit(
                [1 - val_split, val_split])
        else:
            training_rdd = sample_rdd
            val_rdd = self.dataset.get_validation_data()

        if self.tf_model.val_methods is not None and val_rdd is not None:

            self.optimizer = Optimizer.create(
                self.tf_model.training_helper_layer,
                training_rdd,
                IdentityCriterion(),
                batch_size=batch_size,
                optim_method=self.optim_method)
            self.optimizer.set_validation(self.dataset.batch_size, val_rdd,
                                          EveryEpoch(),
                                          self.tf_model.val_methods)
        else:
            self.optimizer = Optimizer.create(
                self.tf_model.training_helper_layer,
                training_rdd,
                IdentityCriterion(),
                batch_size=batch_size,
                optim_method=self.optim_method)

        if self.clip_norm:
            self.optimizer.set_gradclip_l2norm(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.optimizer.set_gradclip_const(min_value, max_value)
Exemple #23
0
 def __init__(self, input, output, jvalue=None, **kwargs):
     super(Model, self).__init__(jvalue, to_list(input), to_list(output),
                                 **kwargs)
Exemple #24
0
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 dataset=None,
                 inputs=None,
                 grads=None,
                 variables=None,
                 graph=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None,
                 add_sample_weights_num=0):
        import tensorflow as tf
        from zoo.util.tf import export_tf
        '''
        TFOptimizer is used for distributed training of tensorflow
        on Spark/BigDL.

        :param loss: The loss tensor of the tensorflow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current tensorflow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(
                loss, optim_method, sess, val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[
                5:]

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.inputs = inputs
        self.graph = graph

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        if val_outputs is not None and val_labels is not None:
            with self.graph.as_default():
                val_labels = [tf.identity(v) for v in val_labels]
            outputs = val_outputs + val_labels + [loss]
        else:
            outputs = [loss]

        self.export_dir = tempfile.mkdtemp()
        export_tf(self.sess,
                  self.export_dir,
                  inputs=self.inputs,
                  outputs=grads + outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names
        }

        with open(os.path.join(self.export_dir, "training_meta.json"),
                  "w") as f:
            f.write(json.dumps(meta))

        self.variable_placeholders = []
        with self.graph.as_default():
            assigns = []
            for v in variables:
                p = tf.placeholder(dtype=tf.float32, shape=v.shape)
                a = tf.assign(v, p)
                self.variable_placeholders.append(p)
                assigns.append(a)
            assign = tf.group(*assigns)
        self.assign = assign

        self.training_helper_layer = TFTrainingHelper(self.export_dir)

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size
        sample_rdd = data.map(lambda t: Sample.from_ndarray(
            t + [np.array(1.0)] * add_sample_weights_num, [np.array([0.0])]))

        self.optimizer = Optimizer.create(self.training_helper_layer,
                                          sample_rdd,
                                          IdentityCriterion(),
                                          batch_size=batch_size,
                                          optim_method=self.optim_method)

        if val_outputs is not None and val_labels is not None:
            val_sample_rdd = self.dataset.val_rdd\
                .map(lambda t: Sample.from_ndarray(t + [np.array(1.0)] * add_sample_weights_num,
                                                   [np.array([0.0])]))
            val_method = [
                TFValidationMethod(m, len(val_outputs), len(val_labels))
                for m in to_list(val_method)
            ]
            self.optimizer.set_validation(self.dataset.batch_size,
                                          val_sample_rdd, EveryEpoch(),
                                          val_method)