def infer(self, input, start_sign, max_seq_len=30, stop_sign=None, build_output=None): """ Inference API for given input # Arguments input: a sequence of data feed into encoder, eg: batch x seqLen x featureSize start_sign: a ndarray which represents start and is fed into decoder max_seq_len: max sequence length for final output stop_sign: a ndarray that indicates model should stop infer further if current output is the same with stopSign build_output: Feeding model output to buildOutput to generate final result """ jinput, input_is_table = Layer.check_input(input) assert not input_is_table jstart_sign, start_sign_is_table = Layer.check_input(start_sign) assert not start_sign_is_table if stop_sign: jstop_sign, stop_sign_is_table = Layer.check_input(stop_sign) assert not start_sign_is_table else: jstop_sign = None results = callBigDlFunc(self.bigdl_type, "seq2seqInfer", self.value, jinput[0], jstart_sign[0], max_seq_len, jstop_sign[0] if jstop_sign else None, build_output) return results
def predict(self, x, batch_per_thread=1, distributed=True): """ Use a model to do prediction. """ if isinstance(x, ImageSet): results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, x, batch_per_thread) return ImageSet(results) if distributed: if isinstance(x, np.ndarray): data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]]), getOrCreateSparkContext()) elif isinstance(x, RDD): data_rdd = x else: raise TypeError("Unsupported prediction data type: %s" % type(x)) results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, data_rdd, batch_per_thread) return results.map(lambda result: Layer.convert_output(result)) else: if isinstance(x, np.ndarray) or isinstance(x, list): results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, self._to_jtensors(x), batch_per_thread) return [Layer.convert_output(result) for result in results] else: raise TypeError("Unsupported prediction data type: %s" % type(x))
def predict(self, x, batch_per_thread=1, distributed=True, mini_batch=False): """ Use a model to do prediction. """ if isinstance(x, ImageSet): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x, batch_per_thread) return ImageSet(results) if isinstance(x, TFImageDataset): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x.get_prediction_data(), x.batch_per_thread) return ImageSet(results) if isinstance(x, MapDataset): raise ValueError("MapDataset is not supported in TFNet") if isinstance(x, TFDataset): results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x.get_prediction_data()) return results.map(lambda result: Layer.convert_output(result)) if mini_batch: results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x) return results.map(lambda result: Layer.convert_output(result)) if distributed: if isinstance(x, np.ndarray): data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]]), getOrCreateSparkContext()) elif isinstance(x, RDD): data_rdd = x else: raise TypeError("Unsupported prediction data type: %s" % type(x)) results = callZooFunc(self.bigdl_type, "zooPredict", self.value, data_rdd, batch_per_thread) return results.map(lambda result: Layer.convert_output(result)) else: start_idx = 0 results = [] while start_idx < len(x): end_idx = min(start_idx + batch_per_thread, len(x)) results.append(self.forward(x[start_idx:end_idx])) start_idx += batch_per_thread return np.concatenate(results, axis=0)
def backward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Performs a back-propagation step through the criterion, with respect to the given input. :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: ndarray """ jinput, input_is_table = Layer.check_input(input) jtarget, target_is_table = Layer.check_input(target) output = callBigDlFunc(self.bigdl_type, "criterionBackward", self.value, jinput, input_is_table, jtarget, target_is_table) return Layer.convert_output(output)
def optimize(self): """ Do an optimization. """ jmodel = callJavaFunc(self.value.optimize) from bigdl.nn.layer import Layer return Layer.of(jmodel)
def optimize(self): """ Do an optimization. """ jmodel = callJavaFunc(get_spark_context(), self.value.optimize) from bigdl.nn.layer import Layer return Layer.of(jmodel)
def forward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Takes an input object, and computes the corresponding loss of the criterion, compared with `target` :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: value of loss """ jinput, input_is_table = Layer.check_input(input) jtarget, target_is_table = Layer.check_input(target) output = callBigDlFunc(self.bigdl_type, "criterionForward", self.value, jinput, input_is_table, jtarget, target_is_table) return output
def distributed_predict(self, inputs, sc): data_type = inputs.map(lambda x: x.__class__.__name__).first() input_is_table = False if data_type == "list": input_is_table = True jinputs = inputs.map(lambda x: Layer.check_input(x)[0]) output = callZooFunc(self.bigdl_type, "inferenceModelDistriPredict", self.value, sc, jinputs, input_is_table) return output.map(lambda x: KerasNet.convert_output(x))
def predict(self, inputs): """ Do prediction on inputs. :param inputs: A numpy array or a list of numpy arrays or JTensor or a list of JTensors. """ jinputs, input_is_table = Layer.check_input(inputs) output = callZooFunc(self.bigdl_type, "inferenceModelPredict", self.value, jinputs, input_is_table) return KerasNet.convert_output(output)
def optimize(self, end_trigger=None, batch_size=32): if end_trigger is None: end_trigger = MaxEpoch(1) data = self.dataset.rdd sample_rdd = data.map(lambda t: Sample.from_ndarray(t, [np.array([0.0])])) variables = Layer.convert_output(callBigDlFunc("float", "trainTFNet", self.export_dir, self.optim_method, sample_rdd, batch_size, end_trigger)) feed_dict = dict(zip(self.variable_placeholders, variables)) self.sess.run(self.assign, feed_dict=feed_dict)
def predict(self, x, batch_per_thread=4, distributed=True): """ Use a model to do prediction. # Arguments x: Prediction data. A Numpy array or RDD of Sample or ImageSet. batch_per_thread: The default value is 4. When distributed is True,the total batch size is batch_per_thread * rdd.getNumPartitions. When distributed is False the total batch size is batch_per_thread * numOfCores. distributed: Boolean. Whether to do prediction in distributed mode or local mode. Default is True. In local mode, x must be a Numpy array. """ if isinstance(x, ImageSet) or isinstance(x, TextSet): results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, x, batch_per_thread) return ImageSet(results) if isinstance(x, ImageSet) else TextSet(results) if distributed: if isinstance(x, np.ndarray): data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]])) elif isinstance(x, RDD): data_rdd = x else: raise TypeError("Unsupported prediction data type: %s" % type(x)) results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, data_rdd, batch_per_thread) return results.map(lambda result: Layer.convert_output(result)) else: if isinstance(x, np.ndarray) or isinstance(x, list): results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, self._to_jtensors(x), batch_per_thread) return [Layer.convert_output(result) for result in results] else: raise TypeError("Unsupported prediction data type: %s" % type(x))
def _create_model(self, java_model): # explicity reset SamplePreprocessing even though java_model already has the preprocessing, # so that python NNClassifierModel also has sample_preprocessing estPreprocessing = self.getSamplePreprocessing() model = Layer.from_jvalue(java_model.getModel(), bigdl_type=self.bigdl_type) classifierModel = NNClassifierModel(model=model, feature_preprocessing=None, jvalue=java_model, bigdl_type=self.bigdl_type) \ .setSamplePreprocessing(ChainedPreprocessing([ToTuple(), estPreprocessing])) classifierModel.setFeaturesCol(self.getFeaturesCol()) \ .setPredictionCol(self.getPredictionCol()) \ .setBatchSize(java_model.getBatchSize()) return classifierModel
def init_from_existing_model(path, weight_path=None, input_seq_len=-1.0, hidden_drop=-1.0, attn_drop=-1.0, output_all_block=True, bigdl_type="float"): """ Load an existing BERT model (with weights). # Arguments path: The path for the pre-defined model. Local file system, HDFS and Amazon S3 are supported. HDFS path should be like 'hdfs://[host]:[port]/xxx'. Amazon S3 path should be like 's3a://bucket/xxx'. weight_path: The path for pre-trained weights if any. Default is None. """ jlayer = callBigDlFunc(bigdl_type, "loadBERT", path, weight_path, input_seq_len, hidden_drop, attn_drop, output_all_block) model = Layer(jvalue=jlayer, bigdl_type=bigdl_type) model.__class__ = BERT return model
def layers(self): jlayers = callBigDlFunc(self.bigdl_type, "getSubModules", self) layers = [Layer.of(jlayer) for jlayer in jlayers] return layers
def flattened_layers(self, include_container=False): jlayers = callBigDlFunc(self.bigdl_type, "getFlattenSubModules", self, include_container) layers = [Layer.of(jlayer) for jlayer in jlayers] return layers
def _do_load(jmodel, bigdl_type="float"): model = Layer(jvalue=jmodel, bigdl_type=bigdl_type) model.value = jmodel return model