def predict(self, x, batch_per_thread=1, distributed=True, mini_batch=False): """ Use a model to do prediction. """ if isinstance(x, ImageSet): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x, batch_per_thread) return ImageSet(results) if isinstance(x, TFImageDataset): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x.get_prediction_data(), x.batch_per_thread) return ImageSet(results) if isinstance(x, TFDataset): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x.get_prediction_data()) return results.map(lambda result: Layer.convert_output(result)) if mini_batch: results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x) return results.map(lambda result: Layer.convert_output(result)) if distributed: if isinstance(x, np.ndarray): data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]]), getOrCreateSparkContext()) elif isinstance(x, RDD): data_rdd = x else: raise TypeError("Unsupported prediction data type: %s" % type(x)) results = callZooFunc(self.bigdl_type, "zooPredict", self.value, data_rdd, batch_per_thread) return results.map(lambda result: Layer.convert_output(result)) else: start_idx = 0 results = [] while start_idx < len(x): end_idx = min(start_idx + batch_per_thread, len(x)) results.append(self.forward(x[start_idx:end_idx])) start_idx += batch_per_thread return np.concatenate(results, axis=0)
def backward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Performs a back-propagation step through the criterion, with respect to the given input. :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: ndarray """ jinput, input_is_table = Layer.check_input(input) jtarget, target_is_table = Layer.check_input(target) output = callBigDlFunc(self.bigdl_type, "criterionBackward", self.value, jinput, input_is_table, jtarget, target_is_table) return Layer.convert_output(output)
def forward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Takes an input object, and computes the corresponding loss of the criterion, compared with `target` :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: value of loss """ jinput, input_is_table = Layer.check_input(input) jtarget, target_is_table = Layer.check_input(target) output = callBigDlFunc(self.bigdl_type, "criterionForward", self.value, jinput, input_is_table, jtarget, target_is_table) return output
def optimize(self): """ Do an optimization. """ jmodel = callJavaFunc(self.value.optimize) from bigdl.dllib.nn.layer import Layer return Layer.of(jmodel)
def distributed_predict(self, inputs, sc): data_type = inputs.map(lambda x: x.__class__.__name__).first() input_is_table = False if data_type == "list": input_is_table = True jinputs = inputs.map(lambda x: Layer.check_input(x)[0]) output = callZooFunc(self.bigdl_type, "inferenceModelDistriPredict", self.value, sc, jinputs, input_is_table) return output.map(lambda x: KerasNet.convert_output(x))
def predict(self, inputs): """ Do prediction on inputs. :param inputs: A numpy array or a list of numpy arrays or JTensor or a list of JTensors. """ jinputs, input_is_table = Layer.check_input(inputs) output = callZooFunc(self.bigdl_type, "inferenceModelPredict", self.value, jinputs, input_is_table) return KerasNet.convert_output(output)
def predict(self, x, batch_per_thread=4, distributed=True): """ Use a model to do prediction. # Arguments x: Prediction data. A Numpy array or RDD of Sample or ImageSet. batch_per_thread: The default value is 4. When distributed is True,the total batch size is batch_per_thread * rdd.getNumPartitions. When distributed is False the total batch size is batch_per_thread * numOfCores. distributed: Boolean. Whether to do prediction in distributed mode or local mode. Default is True. In local mode, x must be a Numpy array. """ if isinstance(x, ImageSet) or isinstance(x, TextSet): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x, batch_per_thread) return ImageSet(results) if isinstance( x, ImageSet) else TextSet(results) if distributed: if isinstance(x, np.ndarray): data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]])) elif isinstance(x, RDD): data_rdd = x else: raise TypeError("Unsupported prediction data type: %s" % type(x)) results = callZooFunc(self.bigdl_type, "zooPredict", self.value, data_rdd, batch_per_thread) return results.map(lambda result: Layer.convert_output(result)) else: if isinstance(x, np.ndarray) or isinstance(x, list): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, self._to_jtensors(x), batch_per_thread) return [Layer.convert_output(result) for result in results] else: raise TypeError("Unsupported prediction data type: %s" % type(x))
def _create_model(self, java_model): # explicity reset SamplePreprocessing even though java_model already has the preprocessing, # so that python NNClassifierModel also has sample_preprocessing estPreprocessing = self.getSamplePreprocessing() model = Layer.from_jvalue(java_model.getModel(), bigdl_type=self.bigdl_type) classifierModel = NNClassifierModel(model=model, feature_preprocessing=None, jvalue=java_model, bigdl_type=self.bigdl_type) \ .setSamplePreprocessing(ChainedPreprocessing([ToTuple(), estPreprocessing])) classifierModel.setFeaturesCol(self.getFeaturesCol()) \ .setPredictionCol(self.getPredictionCol()) \ .setBatchSize(java_model.getBatchSize()) return classifierModel
def init_from_existing_model(path, weight_path=None, input_seq_len=-1.0, hidden_drop=-1.0, attn_drop=-1.0, output_all_block=True, bigdl_type="float"): """ Load an existing BERT model (with weights). # Arguments path: The path for the pre-defined model. Local file system, HDFS and Amazon S3 are supported. HDFS path should be like 'hdfs://[host]:[port]/xxx'. Amazon S3 path should be like 's3a://bucket/xxx'. weight_path: The path for pre-trained weights if any. Default is None. """ jlayer = callZooFunc(bigdl_type, "loadBERT", path, weight_path, input_seq_len, hidden_drop, attn_drop, output_all_block) model = Layer(jvalue=jlayer, bigdl_type=bigdl_type) model.__class__ = BERT return model
def flattened_layers(self, include_container=False): jlayers = callZooFunc(self.bigdl_type, "getFlattenSubModules", self, include_container) layers = [Layer.of(jlayer) for jlayer in jlayers] return layers
def layers(self): jlayers = callZooFunc(self.bigdl_type, "getSubModules", self) layers = [Layer.of(jlayer) for jlayer in jlayers] return layers