def __create_optimizer(self, x=None, y=None, batch_size=32, nb_epoch=10, validation_data=None, is_distributed=False): if is_distributed: if isinstance(x, np.ndarray): input = to_sample_rdd(x, y) validation_data_rdd = to_sample_rdd(*validation_data) elif isinstance(x, RDD): input = x validation_data_rdd = validation_data return self.__create_distributed_optimizer( training_rdd=input, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=validation_data_rdd) else: if isinstance(x, np.ndarray): return self.__create_local_optimizer( x, y, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=validation_data) raise Exception("not supported type: %s" % x)
def evaluate(self, x, y, batch_size=32, sample_weight=None, is_distributed=False): """ Evaluate a model by the given metrics. :param x: ndarray or list of ndarray for local mode. RDD[Sample] for distributed mode :param y: ndarray or list of ndarray for local mode and would be None for cluster mode. :param batch_size :param is_distributed: run in local mode or distributed mode. NB: if is_distributed=true, x should be RDD[Sample] and y should be None :return: """ if sample_weight: unsupport_exp("sample_weight") if is_distributed: if isinstance(x, np.ndarray): input = to_sample_rdd(x, y) elif isinstance(x, RDD): input = x if self.metrics: sc = get_spark_context() return [r.result for r in self.bmodel.evaluate(input, batch_size, self.metrics)] else: raise Exception("No Metrics found.") else: raise Exception("We only support evaluation in distributed mode")
def predict(self, x, batch_pre_core=-1, distributed=True): """ Use a model to do prediction. """ if isinstance(x, ImageSet): results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, x, batch_pre_core) return ImageSet(results) if distributed: if isinstance(x, np.ndarray): data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]])) elif isinstance(x, RDD): data_rdd = x else: raise TypeError("Unsupported prediction data type: %s" % type(x)) results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, data_rdd, batch_pre_core) return results.map(lambda result: Layer.convert_output(result)) else: if isinstance(x, np.ndarray) or isinstance(x, list): results = callBigDlFunc(self.bigdl_type, "zooPredict", self.value, self._to_jtensors(x), batch_pre_core) return [Layer.convert_output(result) for result in results] else: raise TypeError("Unsupported prediction data type: %s" % type(x))
def test_lenet_distributed_rdd(self): kmodel, X_train, y_train = TestModels.kmodel_seq_lenet_mnist() sc = get_spark_context() from bigdl.util.common import Sample from bigdl.util.common import to_sample_rdd training_rdd = to_sample_rdd(X_train, y_train) self.modelTest(X_train, kmodel, dump_weights=True) kmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model = with_bigdl_backend(kmodel) model.fit(training_rdd, batch_size=4, nb_epoch=2, validation_data=training_rdd, is_distributed=True) model.predict(X_train, is_distributed=True).collect() model.evaluate(X_train, y_train, is_distributed=True) print(model)
def predict(self, x, batch_size=None, verbose=None, is_distributed=False): """Generates output predictions for the input samples, processing the samples in a batched way. # Arguments x: the input data, as a Numpy array or list of Numpy array for local mode. as RDD[Sample] for distributed mode is_distributed: used to control run in local or cluster. the default value is False # Returns A Numpy array or RDD[Sample] of predictions. """ if batch_size or verbose: raise Exception("we don't support batch_size or verbose for now") if is_distributed: if isinstance(x, np.ndarray): input = to_sample_rdd(x, np.zeros([x.shape[0]])) # np.asarray(self.bmodel.predict(x_rdd).collect()) elif isinstance(x, RDD): input = x return self.bmodel.predict(input) else: if isinstance(x, np.ndarray): return self.bmodel.predict_local(x) raise Exception("not supported type: %s" % x)