Exemple #1
0
 def image_frame(cls, image_frame, memory_type="DRAM",
                 sequential_order=False,
                 shuffle=True, bigdl_type="float"):
     """
     Create FeatureSet from ImageFrame.
     :param image_frame: ImageFrame
     :param memory_type: string, DRAM, PMEM or a Int number.
                         If it's DRAM, will cache dataset into dynamic random-access memory
                         If it's PMEM, will cache dataset into Intel Optane DC Persistent Memory
                         If it's a Int number n, will cache dataset into disk, and only hold 1/n
                           of the data into memory during the training. After going through the
                           1/n, we will release the current cache, and load another 1/n into
                           memory.
     :param sequential_order: whether to iterate the elements in the feature set
                              in sequential order for training.
     :param shuffle: whether to shuffle the elements in each partition before each epoch
                     when training
     :param bigdl_type: numeric type
     :return: A feature set
     """
     jvalue = callZooFunc(bigdl_type, "createFeatureSetFromImageFrame",
                          image_frame, memory_type, sequential_order, shuffle)
     return cls(jvalue=jvalue)
 def from_pytorch(model):
     """
     Create a TorchModel directly from PyTorch model, e.g. model in torchvision.models.
     :param model: a PyTorch model, or a function to create PyTorch model
     """
     weights = []
     import types
     if isinstance(model, types.FunctionType) or isinstance(
             model, types.ClassType):
         for param in trainable_param(model()):
             weights.append(param.view(-1))
     else:
         for param in trainable_param(model):
             weights.append(param.view(-1))
     flatten_weight = torch.nn.utils.parameters_to_vector(
         weights).data.numpy()
     bys = io.BytesIO()
     torch.save(model, bys, pickle_module=zoo_pickle_module)
     weights = JTensor.from_ndarray(flatten_weight)
     jvalue = callZooFunc("float", "createTorchModel", bys.getvalue(),
                          weights)
     net = TorchModel(jvalue, bys.getvalue())
     return net
Exemple #3
0
    def from_relation_lists(cls,
                            relations,
                            corpus1,
                            corpus2,
                            bigdl_type="float"):
        """
        Used to generate a TextSet for ranking.

        This method does the following:
        1. For each id1 in relations, find the list of id2 with corresponding label that
        comes together with id1.
        In other words, group relations by id1.
        2. Join with corpus to transform each id to indexedTokens.
        Note: Make sure that the corpus has been transformed by SequenceShaper and WordIndexer.
        3. For each list, generate a TextFeature having Sample with:
        - feature of shape (list_length, text1_length + text2_length).
        - label of shape (list_length, 1).

        :param relations: List or RDD of Relation.
        :param corpus1: TextSet that contains all id1 in relations. For each TextFeature in corpus1,
                        text must have been transformed to indexedTokens of the same length.
        :param corpus2: TextSet that contains all id2 in relations. For each TextFeature in corpus2,
                        text must have been transformed to indexedTokens of the same length.
        Note that if relations is a list, then corpus1 and corpus2 must both be LocalTextSet.
        If relations is RDD, then corpus1 and corpus2 must both be DistributedTextSet.

        :return: TextSet.
        """
        if isinstance(relations, RDD):
            relations = relations.map(lambda x: x.to_tuple())
        elif isinstance(relations, list):
            relations = [relation.to_tuple() for relation in relations]
        else:
            raise TypeError("relations should be RDD or list of Relation")
        jvalue = callZooFunc(bigdl_type, "textSetFromRelationLists", relations,
                             corpus1, corpus2)
        return TextSet(jvalue=jvalue)
Exemple #4
0
    def read(cls,
             path,
             sc=None,
             min_partitions=1,
             resize_height=-1,
             resize_width=-1,
             image_codec=-1,
             with_label=False,
             one_based_label=True,
             bigdl_type="float"):
        """
        Read images as Image Set

        :param path: path to read images

        if sc is defined, path can be local or HDFS. Wildcard character are supported.

        if withLabel is set to true, path should be a directory that have two levels. The
        first level is class folders, and the second is images. All images belong to a same
        class should be put into the same class folder. So each image in the path is labeled by the
        folder it belongs.

        :param sc: SparkContext
        :param min_partitions: A suggestion value of the minimal splitting number for input data.
        :param resize_height: height after resize, by default is -1 which will not resize the image
        :param resize_width: width after resize, by default is -1 which will not resize the image
        :param image_codec: specifying the color type of a loaded image, same as in OpenCV.imread.By default is Imgcodecs.CV_LOAD_IMAGE_UNCHANGED(-1)
        :param with_label: whether to treat folders in the path as image classification 
                           labels and read the labels into ImageSet.
        :param one_based_label: whether to use one based label
        :return: ImageSet
        """
        return ImageSet(
            jvalue=callZooFunc(bigdl_type, "readImageSet", path, sc,
                               min_partitions, resize_height, resize_width,
                               image_codec, with_label, one_based_label))
    def init_from_existing_model(path,
                                 weight_path=None,
                                 input_seq_len=-1.0,
                                 hidden_drop=-1.0,
                                 attn_drop=-1.0,
                                 output_all_block=True,
                                 bigdl_type="float"):
        """
        Load an existing BERT model (with weights).

        # Arguments
        path: The path for the pre-defined model.
              Local file system, HDFS and Amazon S3 are supported.
              HDFS path should be like 'hdfs://[host]:[port]/xxx'.
              Amazon S3 path should be like 's3a://bucket/xxx'.
        weight_path: The path for pre-trained weights if any. Default is None.
        """
        jlayer = callZooFunc(bigdl_type, "loadBERT", path, weight_path,
                             input_seq_len, hidden_drop, attn_drop,
                             output_all_block)

        model = Layer(jvalue=jlayer, bigdl_type=bigdl_type)
        model.__class__ = BERT
        return model
Exemple #6
0
    def predict_classes(self, x, batch_per_thread=4, zero_based_label=True):
        """
        Use a model to predict for classes. By default, label predictions start from 0.

        # Arguments
        x: Prediction data. A Numpy array or RDD of Sample.
        batch_per_partition:
          The default value is 4.
          When distributed is True, the total batch size is batch_per_thread * rdd.getNumPartitions.
          When distributed is False the total batch size is batch_per_thread * numOfCores.
        zero_based_label: Boolean. Whether result labels start from 0.
                          Default is True. If False, result labels start from 1.
        """
        if isinstance(x, np.ndarray):
            data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]]))
        elif isinstance(x, RDD):
            data_rdd = x
        else:
            raise TypeError("Unsupported prediction data type: %s" % type(x))
        return callZooFunc(self.bigdl_type, "zooPredictClasses",
                           self.value,
                           data_rdd,
                           batch_per_thread,
                           zero_based_label)
 def save_checkpoint(self):
     callZooFunc(self.bigdl_type, "saveCheckpoint", self.value)
Exemple #8
0
 def __init__(self, bigdl_type="float", *args):
     self.bigdl_type = bigdl_type
     self.value = callZooFunc(bigdl_type,
                              JavaValue.jvm_class_constructor(self), *args)
def get_optimizer_version(bigdl_type="float"):
    """
    Get DistriOptimizer version.
    return optimizerVersion
    """
    return callZooFunc(bigdl_type, "getOptimizerVersion")
Exemple #10
0
 def load(path):
     jvalue = callZooFunc("float", "loadXGBRegressorModel", path)
     return XGBRegressorModel(jvalue=jvalue)
Exemple #11
0
 def transform(self, dataset):
     df = callZooFunc("float", "transformXGBRegressorModel", self.value,
                      dataset)
     return df
Exemple #12
0
 def setPredictionCol(self, prediction):
     callZooFunc("float", "setPredictionXGBRegressorModel", self.value,
                 prediction)
Exemple #13
0
 def load(path):
     jvalue = callZooFunc("float", "loadNNClassifierModel", path)
     return NNClassifierModel(model=None,
                              feature_preprocessing=None,
                              jvalue=jvalue)
Exemple #14
0
    def __init__(self,
                 model,
                 criterion,
                 feature_preprocessing=None,
                 label_preprocessing=None,
                 jvalue=None,
                 bigdl_type="float"):
        """
        Construct a NNEstimator with BigDL model, criterion and Preprocessing for feature and label
        data.
        :param model: BigDL Model to be trained.
        :param criterion: BigDL criterion.
        :param feature_preprocessing: The param converts the data in feature column to a
               Tensor or to a Sample directly. It expects a List of Int as the size of the
               converted Tensor, or a Preprocessing[F, Tensor[T]]

               If a List of Int is set as feature_preprocessing, it can only handle the case that
               feature column contains the following data types:
               Float, Double, Int, Array[Float], Array[Double], Array[Int] and MLlib Vector. The
               feature data are converted to Tensors with the specified sizes before
               sending to the model. Internally, a SeqToTensor is generated according to the
               size, and used as the feature_preprocessing.

               Alternatively, user can set feature_preprocessing as Preprocessing[F, Tensor[T]]
               that transforms the feature data to a Tensor[T]. Some pre-defined Preprocessing are
               provided in package zoo.feature. Multiple Preprocessing can be combined as a
               ChainedPreprocessing.

               The feature_preprocessing will also be copied to the generated NNModel and applied
               to feature column during transform.
        :param label_preprocessing: similar to feature_preprocessing, but applies to Label data.
        :param jvalue: Java object create by Py4j
        :param bigdl_type: optional parameter. data type of model, "float"(default) or "double".
        """
        super(NNEstimator, self).__init__()

        # avoid initialization during import.
        if not feature_preprocessing:
            feature_preprocessing = SeqToTensor()
        if not label_preprocessing:
            label_preprocessing = SeqToTensor()

        if type(feature_preprocessing) is list:
            if type(feature_preprocessing[0]) is list:
                feature_preprocessing = SeqToMultipleTensors(
                    feature_preprocessing)
            elif isinstance(feature_preprocessing[0], int):
                feature_preprocessing = SeqToTensor(feature_preprocessing)

        if type(label_preprocessing) is list:
            assert (all(isinstance(x, int) for x in label_preprocessing))
            label_preprocessing = SeqToTensor(label_preprocessing)

        sample_preprocessing = FeatureLabelPreprocessing(
            feature_preprocessing, label_preprocessing)

        self.value = jvalue if jvalue else callZooFunc(
            bigdl_type, self.jvm_class_constructor(), model, criterion,
            sample_preprocessing)
        self.model = model
        self.samplePreprocessing = sample_preprocessing
        self.bigdl_type = bigdl_type
        self._java_obj = self.value

        self.maxEpoch = Param(self, "maxEpoch", "number of max Epoch")
        self.learningRate = Param(self, "learningRate", "learning rate")
        self.learningRateDecay = Param(self, "learningRateDecay",
                                       "learning rate decay")
        self.cachingSample = Param(self, "cachingSample", "cachingSample")

        self.train_summary = None
        self.validation_config = None
        self.checkpoint_config = None
        self.validation_summary = None
        self.endWhen = None
        self.dataCacheLevel = "DRAM"
Exemple #15
0
def write_parquet(df, path, mode):
    callZooFunc("float", "dfWriteParquet", df, path, mode)
Exemple #16
0
 def fit(self, df):
     return callZooFunc("float", "fitXGBRegressor", self.value, df)
Exemple #17
0
 def setFeaturesCol(self, features):
     callZooFunc("float", "setFeaturesXGBRegressorModel", self.value,
                 features)
Exemple #18
0
 def setFeaturesCol(self, features):
     callZooFunc("float", "setFeaturesXGBClassifierModel", self.value,
                 features)
Exemple #19
0
 def setInferBatchSize(self, value: int):
     callZooFunc("float", "setInferBatchSizeXGBRegressorModel", self.value,
                 value)
Exemple #20
0
 def setPredictionCol(self, prediction):
     callZooFunc("float", "setPredictionXGBClassifierModel", self.value,
                 prediction)
Exemple #21
0
 def save(self, path):
     print("start saving in python side")
     callZooFunc("float", "saveXGBRegressorModel", self.value, path)
Exemple #22
0
 def transform(self, dataset):
     df = callZooFunc("float", "transformXGBClassifierModel", self.value,
                      dataset)
     return df
Exemple #23
0
def set_optimizer_version(optimizerVersion, bigdl_type="float"):
    """
    Set DistriOptimizer version.
    param optimizerVersion: should be "OptimizerV1" or "OptimizerV2".
    """
    callZooFunc(bigdl_type, "setOptimizerVersion", optimizerVersion)
Exemple #24
0
 def __init__(self):
     super(XGBRegressor, self).__init__()
     bigdl_type = "float"
     self.value = callZooFunc("float", "getXGBRegressor")
Exemple #25
0
 def __init__(self, module_bytes, weights, bigdl_type="float"):
     weights = JTensor.from_ndarray(weights)
     self.module_bytes = module_bytes
     self.value = callZooFunc(bigdl_type, self.jvm_class_constructor(),
                              module_bytes, weights)
     self.bigdl_type = bigdl_type
Exemple #26
0
 def setNthread(self, value: int):
     callZooFunc("float", "setXGBRegressorNthread", self.value, value)
 def load_checkpoint(self, path):
     callZooFunc(self.bigdl_type, "loadZooCheckpoint", self.value, path)
     self.get_weights_to_python()
Exemple #28
0
 def setNumRound(self, value: int):
     callZooFunc("float", "setXGBRegressorNumRound", self.value, value)
 def standardScale(df):
     return callZooFunc("float", "standardScaleDF", df)
Exemple #30
0
 def setNumWorkers(self, value: int):
     callZooFunc("float", "setXGBRegressorNumWorkers", self.value, value)