Esempio n. 1
0
    def predict(self, x, batch_per_thread=1, distributed=True, mini_batch=False):
        """
        Use a model to do prediction.
        """
        if isinstance(x, ImageSet):
            results = callZooFunc(self.bigdl_type, "zooPredict",
                                  self.value,
                                  x,
                                  batch_per_thread)
            return ImageSet(results)

        if isinstance(x, TFImageDataset):
            results = callZooFunc(self.bigdl_type, "zooPredict",
                                  self.value,
                                  x.get_prediction_data(),
                                  x.batch_per_thread)
            return ImageSet(results)

        if isinstance(x, TFDataset):
            results = callZooFunc(self.bigdl_type, "zooPredict",
                                  self.value,
                                  x.get_prediction_data())
            return results.map(lambda result: Layer.convert_output(result))

        if mini_batch:
            results = callZooFunc(self.bigdl_type, "zooPredict",
                                  self.value,
                                  x)
            return results.map(lambda result: Layer.convert_output(result))

        if distributed:
            if isinstance(x, np.ndarray):
                data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]]), getOrCreateSparkContext())
            elif isinstance(x, RDD):
                data_rdd = x
            else:
                raise TypeError("Unsupported prediction data type: %s" % type(x))
            results = callZooFunc(self.bigdl_type, "zooPredict",
                                  self.value,
                                  data_rdd,
                                  batch_per_thread)
            return results.map(lambda result: Layer.convert_output(result))
        else:
            start_idx = 0
            results = []
            while start_idx < len(x):
                end_idx = min(start_idx + batch_per_thread, len(x))
                results.append(self.forward(x[start_idx:end_idx]))
                start_idx += batch_per_thread

            return np.concatenate(results, axis=0)
Esempio n. 2
0
    def backward(self, input, target):
        """
        NB: It's for debug only, please use optimizer.optimize() in production.
        Performs a back-propagation step through the criterion, with respect to the given input.

        :param input: ndarray or list of ndarray
        :param target: ndarray or list of ndarray
        :return: ndarray
        """
        jinput, input_is_table = Layer.check_input(input)
        jtarget, target_is_table = Layer.check_input(target)
        output = callBigDlFunc(self.bigdl_type, "criterionBackward",
                               self.value, jinput, input_is_table, jtarget,
                               target_is_table)
        return Layer.convert_output(output)
Esempio n. 3
0
    def forward(self, input, target):
        """
        NB: It's for debug only, please use optimizer.optimize() in production.
        Takes an input object, and computes the corresponding loss of the criterion,
        compared with `target`

        :param input: ndarray or list of ndarray
        :param target: ndarray or list of ndarray
        :return: value of loss
        """
        jinput, input_is_table = Layer.check_input(input)
        jtarget, target_is_table = Layer.check_input(target)
        output = callBigDlFunc(self.bigdl_type, "criterionForward", self.value,
                               jinput, input_is_table, jtarget,
                               target_is_table)
        return output
Esempio n. 4
0
 def optimize(self):
     """
     Do an optimization.
     """
     jmodel = callJavaFunc(self.value.optimize)
     from bigdl.dllib.nn.layer import Layer
     return Layer.of(jmodel)
Esempio n. 5
0
    def distributed_predict(self, inputs, sc):
        data_type = inputs.map(lambda x: x.__class__.__name__).first()
        input_is_table = False
        if data_type == "list":
            input_is_table = True
        jinputs = inputs.map(lambda x: Layer.check_input(x)[0])

        output = callZooFunc(self.bigdl_type, "inferenceModelDistriPredict",
                             self.value, sc, jinputs, input_is_table)
        return output.map(lambda x: KerasNet.convert_output(x))
Esempio n. 6
0
    def predict(self, inputs):
        """
        Do prediction on inputs.

        :param inputs: A numpy array or a list of numpy arrays or JTensor or a list of JTensors.
        """
        jinputs, input_is_table = Layer.check_input(inputs)
        output = callZooFunc(self.bigdl_type, "inferenceModelPredict",
                             self.value, jinputs, input_is_table)
        return KerasNet.convert_output(output)
Esempio n. 7
0
    def predict(self, x, batch_per_thread=4, distributed=True):
        """
        Use a model to do prediction.

        # Arguments
        x: Prediction data. A Numpy array or RDD of Sample or ImageSet.
        batch_per_thread:
          The default value is 4.
          When distributed is True,the total batch size is batch_per_thread * rdd.getNumPartitions.
          When distributed is False the total batch size is batch_per_thread * numOfCores.
        distributed: Boolean. Whether to do prediction in distributed mode or local mode.
                     Default is True. In local mode, x must be a Numpy array.
        """
        if isinstance(x, ImageSet) or isinstance(x, TextSet):
            results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x,
                                  batch_per_thread)
            return ImageSet(results) if isinstance(
                x, ImageSet) else TextSet(results)
        if distributed:
            if isinstance(x, np.ndarray):
                data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]]))
            elif isinstance(x, RDD):
                data_rdd = x
            else:
                raise TypeError("Unsupported prediction data type: %s" %
                                type(x))
            results = callZooFunc(self.bigdl_type, "zooPredict", self.value,
                                  data_rdd, batch_per_thread)
            return results.map(lambda result: Layer.convert_output(result))
        else:
            if isinstance(x, np.ndarray) or isinstance(x, list):
                results = callZooFunc(self.bigdl_type,
                                      "zooPredict", self.value,
                                      self._to_jtensors(x), batch_per_thread)
                return [Layer.convert_output(result) for result in results]
            else:
                raise TypeError("Unsupported prediction data type: %s" %
                                type(x))
Esempio n. 8
0
    def _create_model(self, java_model):
        # explicity reset SamplePreprocessing even though java_model already has the preprocessing,
        # so that python NNClassifierModel also has sample_preprocessing
        estPreprocessing = self.getSamplePreprocessing()
        model = Layer.from_jvalue(java_model.getModel(),
                                  bigdl_type=self.bigdl_type)
        classifierModel = NNClassifierModel(model=model, feature_preprocessing=None,
                                            jvalue=java_model, bigdl_type=self.bigdl_type) \
            .setSamplePreprocessing(ChainedPreprocessing([ToTuple(), estPreprocessing]))

        classifierModel.setFeaturesCol(self.getFeaturesCol()) \
            .setPredictionCol(self.getPredictionCol()) \
            .setBatchSize(java_model.getBatchSize())
        return classifierModel
Esempio n. 9
0
    def init_from_existing_model(path,
                                 weight_path=None,
                                 input_seq_len=-1.0,
                                 hidden_drop=-1.0,
                                 attn_drop=-1.0,
                                 output_all_block=True,
                                 bigdl_type="float"):
        """
        Load an existing BERT model (with weights).

        # Arguments
        path: The path for the pre-defined model.
              Local file system, HDFS and Amazon S3 are supported.
              HDFS path should be like 'hdfs://[host]:[port]/xxx'.
              Amazon S3 path should be like 's3a://bucket/xxx'.
        weight_path: The path for pre-trained weights if any. Default is None.
        """
        jlayer = callZooFunc(bigdl_type, "loadBERT", path, weight_path,
                             input_seq_len, hidden_drop, attn_drop,
                             output_all_block)

        model = Layer(jvalue=jlayer, bigdl_type=bigdl_type)
        model.__class__ = BERT
        return model
Esempio n. 10
0
 def flattened_layers(self, include_container=False):
     jlayers = callZooFunc(self.bigdl_type, "getFlattenSubModules", self,
                           include_container)
     layers = [Layer.of(jlayer) for jlayer in jlayers]
     return layers
Esempio n. 11
0
 def layers(self):
     jlayers = callZooFunc(self.bigdl_type, "getSubModules", self)
     layers = [Layer.of(jlayer) for jlayer in jlayers]
     return layers