Python callZooFunc Examples, bigdl.dllib.utils.file_utils.callZooFunc Python Examples

Example #1

0

Show file

    def normalize(self):
        """
        Do normalization on tokens.
        Need to tokenize first.
        See Normalizer for more details.

        :return: TextSet after normalization.
        """
        jvalue = callZooFunc(self.bigdl_type, "textSetNormalize", self.value)
        return TextSet(jvalue=jvalue)

Example #2

0

Show file

File: inference_model.py Project: EmiCareOfCell44/BigDL

    def predict(self, inputs):
        """
        Do prediction on inputs.

        :param inputs: A numpy array or a list of numpy arrays or JTensor or a list of JTensors.
        """
        jinputs, input_is_table = Layer.check_input(inputs)
        output = callZooFunc(self.bigdl_type, "inferenceModelPredict",
                             self.value, jinputs, input_is_table)
        return KerasNet.convert_output(output)

Example #3

0

Show file

File: inference_model.py Project: EmiCareOfCell44/BigDL

    def distributed_predict(self, inputs, sc):
        data_type = inputs.map(lambda x: x.__class__.__name__).first()
        input_is_table = False
        if data_type == "list":
            input_is_table = True
        jinputs = inputs.map(lambda x: Layer.check_input(x)[0])

        output = callZooFunc(self.bigdl_type, "inferenceModelDistriPredict",
                             self.value, sc, jinputs, input_is_table)
        return output.map(lambda x: KerasNet.convert_output(x))

Example #4

0

Show file

    def set_word_index(self, vocab):
        """
        Assign a word_index dictionary for this TextSet to use during word2idx.
        If you load the word_index from the saved file, you are recommended to use `load_word_index`
        directly.

        :return: TextSet with the word_index set.
        """
        jvalue = callZooFunc(self.bigdl_type, "textSetSetWordIndex", self.value, vocab)
        return TextSet(jvalue=jvalue)

Example #5

0

Show file

    def generate_sample(self):
        """
        Generate BigDL Sample.
        Need to word2idx first.
        See TextFeatureToSample for more details.

        :return: TextSet with Samples.
        """
        jvalue = callZooFunc(self.bigdl_type, "textSetGenerateSample", self.value)
        return TextSet(jvalue=jvalue)

Example #6

0

Show file

    def load_bigdl(model_path, weight_path=None, bigdl_type="float"):
        """
        Load a pre-trained BigDL model.

        :param model_path: The path to the pre-trained model.
        :param weight_path: The path to the weights of the pre-trained model. Default is None.
        :return: A pre-trained model.
        """
        jmodel = callZooFunc(bigdl_type, "netLoadBigDL", model_path,
                             weight_path)
        return GraphNet.from_jvalue(jmodel)

Example #7

0

Show file

 def get_predict(self, key="predict"):
     """
     get prediction list from ImageSet
     """
     predicts = callZooFunc(self.bigdl_type, "localImageSetToPredict",
                            self.value, key)
     return list(
         map(
             lambda predict:
             (predict[0], list(map(lambda x: x.to_ndarray(), predict[1])))
             if predict[1] else (predict[0], None), predicts))

Example #8

0

Show file

 def tf_dataset(cls, func, total_size, bigdl_type="float"):
     """
     :param func: a function return a tensorflow dataset
     :param total_size: total size of this dataset
     :param bigdl_type: numeric type
     :return: A feature set
     """
     func = CloudPickleSerializer.dumps(CloudPickleSerializer, func)
     jvalue = callZooFunc(bigdl_type, "createFeatureSetFromTfDataset", func,
                          total_size)
     return cls(jvalue=jvalue)

Example #9

0

Show file

    def to_local(self):
        """
        Convert to a LocalTextSet.

        :return: LocalTextSet
        """
        if self.is_local():
            jvalue = self.value
        else:
            jvalue = callZooFunc(self.bigdl_type, "textSetToLocal", self.value)
        return LocalTextSet(jvalue=jvalue)

Example #10

0

Show file

 def __call__(self, x):
     """
     Some other modules point to current module
     :param x: input variables. x is either a Variable or list of Variable.
     :return: Variable containing current module
     """
     from bigdl.dllib.keras.autograd import Variable
     return Variable.from_jvalue(callZooFunc(self.bigdl_type,
                                             "connectInputs",
                                             self,
                                             to_list(x)))

Example #11

0

Show file

File: estimator.py Project: EmiCareOfCell44/BigDL

 def train_imagefeature(self, train_set, criterion, end_trigger=None, checkpoint_trigger=None,
                        validation_set=None, validation_method=None, batch_size=32):
     """
     Train model with provided imageFeature trainSet and criterion.
     The training will end until the endTrigger is triggered.
     During the training, if checkPointTrigger is defined and triggered,
     the model will be saved to modelDir. And if validationSet and validationMethod
     is defined, the model will be evaluated at the checkpoint.
     :param train_set: training FeatureSet, a FeatureSet[ImageFeature]
     :param criterion: Loss function
     :param end_trigger: When to finish the training
     :param checkpoint_trigger: When to save a checkpoint and evaluate model.
     :param validation_set: Validation FeatureSet, a FeatureSet[Sample[T]]
     :param validation_method: Validation Methods.
     :param batch_size: Batch size
     :return:
     """
     callZooFunc(self.bigdl_type, "estimatorTrainImageFeature", self.value, train_set,
                 criterion, end_trigger, checkpoint_trigger, validation_set,
                 validation_method, batch_size)
     return self

Example #12

0

Show file

    def read_parquet(path, sc, bigdl_type="float"):
        """
        Read relations from parquet file.
        Schema should be the following:
        "id1"(string), "id2"(string) and "label"(int).

        :param path: The path to the parquet file.
        :param sc: An instance of SparkContext.
        :return: RDD of Relation.
        """
        jvalue = callZooFunc(bigdl_type, "readRelationsParquet", path, sc)
        return jvalue.map(lambda x: Relation(str(x[0]), str(x[1]), int(x[2])))

Example #13

0

Show file

File: inference_model.py Project: EmiCareOfCell44/BigDL

    def load_tensorflow(self,
                        model_path,
                        model_type="frozenModel",
                        intra_op_parallelism_threads=1,
                        inter_op_parallelism_threads=1,
                        use_per_session_threads=True):
        """
        Load a TensorFlow model using tensorflow.

        :param model_path: String. The file path to the TensorFlow model.
        :param model_type: String. The type of the tensorflow model file. Default is "frozenModel"
        :param intra_op_parallelism_threads: Int. The number of intraOpParallelismThreads.
                                             Default is 1.
        :param inter_op_parallelism_threads: Int. The number of interOpParallelismThreads.
                                             Default is 1.
        :param use_per_session_threads: Boolean. Whether to use perSessionThreads. Default is True.
        """
        callZooFunc(self.bigdl_type, "inferenceModelLoadTensorFlow",
                    self.value, model_path, model_type,
                    intra_op_parallelism_threads, inter_op_parallelism_threads,
                    use_per_session_threads)

Example #14

0

Show file

    def generate_word_index_map(self, remove_topN=0, max_words_num=-1,
                                min_freq=1, existing_map=None):
        """
        Generate word_index map based on sorted word frequencies in descending order.
        Return the result dictionary, which can also be retrieved by 'get_word_index()'.
        Make sure you call this after tokenize. Otherwise you will get an error.
        See word2idx for more details.

        :return: Dictionary {word: id}
        """
        return callZooFunc(self.bigdl_type, "textSetGenerateWordIndexMap", self.value,
                           remove_topN, max_words_num, min_freq, existing_map)

Example #15

0

Show file

File: topology.py Project: EmiCareOfCell44/BigDL

 def forward(self, input):
     """
     NB: It's for debug only, please use optimizer.optimize() in production.
     Takes an input object, and computes the corresponding output of the module
     :param input: ndarray or list of ndarray
     :param input: ndarray or list of ndarray or JTensor or list of JTensor.
     :return: ndarray or list of ndarray
     """
     jinput, input_is_table = self.check_input(input)
     output = callZooFunc(self.bigdl_type, "zooForward", self.value, jinput,
                          input_is_table)
     return self.convert_output(output)

Example #16

0

Show file

File: autograd.py Project: EmiCareOfCell44/BigDL

def sum(x, axis=0, keepDims=False):
    """
    Sum of the values in a a variable, alongside the specified axis.
    :param x: A variable.
    :param axis: An integer. Axes to compute the sum over.
    :param keepDims: A boolean, whether to keep the dimensions or not.
            If `keepDims` is `False`, the rank of the variable is reduced
            by 1 for each entry in `axis`. If `keepDims` is `True`,
            the reduced dimensions are retained with length 1.
    :return: A variable with sum of `x`.
    """
    return Variable.from_jvalue(callZooFunc("float", "sum", x, axis, keepDims))

Example #17

0

Show file

    def shape_sequence(self, len, trunc_mode="pre", pad_element=0):
        """
        Shape the sequence of indices to a fixed length.
        Need to word2idx first.
        See SequenceShaper for more details.

        :return: TextSet after sequence shaping.
        """
        assert isinstance(pad_element, int), "pad_element should be an int"
        jvalue = callZooFunc(self.bigdl_type, "textSetShapeSequence", self.value,
                             len, trunc_mode, pad_element)
        return TextSet(jvalue=jvalue)

Example #18

0

Show file

    def load(model_path, weight_path=None, bigdl_type="float"):
        """
        Load an existing Analytics Zoo model defined in Keras-style(with weights).

        :param model_path: The path to load the saved model.
                          Local file system, HDFS and Amazon S3 are supported.
                          HDFS path should be like 'hdfs://[host]:[port]/xxx'.
                          Amazon S3 path should be like 's3a://bucket/xxx'.
        :param weight_path: The path for pre-trained weights if any. Default is None.
        :return: An Analytics Zoo model.
        """
        jmodel = callZooFunc(bigdl_type, "netLoad", model_path, weight_path)
        return Net.from_jvalue(jmodel, bigdl_type)

Example #19

0

Show file

    def read_parquet(cls, path, sc, bigdl_type="float"):
        """
        Read texts with id from parquet file.
        Schema should be the following:
        "id"(string) and "text"(string).

        :param path: The path to the parquet file.
        :param sc: An instance of SparkContext.

        :return: DistributedTextSet.
        """
        jvalue = callZooFunc(bigdl_type, "textSetReadParquet", path, sc)
        return DistributedTextSet(jvalue=jvalue)

Example #20

0

Show file

File: autograd.py Project: EmiCareOfCell44/BigDL

 def __call__(self, x=None):
     """
     Some other modules point to current module
     :param x: upstream module nodes. x is either a Node or list of Node.
     :return: node containing current module
     """
     x = to_list(x if x else [])
     layer = self
     if isinstance(self, Lambda):
         input_shapes = [var.get_output_shape() for var in x]
         layer = self.create(remove_batch(input_shapes))
     return Variable.from_jvalue(
         callZooFunc(self.bigdl_type, "connectInputs", layer, to_list(x)))

Example #21

0

Show file

    def pytorch_dataloader(cls,
                           dataloader,
                           features="_data[0]",
                           labels="_data[1]",
                           bigdl_type="float"):
        """
        Create FeatureSet from pytorch dataloader
        :param dataloader: a pytorch dataloader, or a function return pytorch dataloader.
        :param features: features in _data, _data is get from dataloader.
        :param labels: lables in _data, _data is get from dataloader.
        :param bigdl_type: numeric type
        :return: A feature set
        """
        import torch
        if isinstance(dataloader, torch.utils.data.DataLoader):
            node_num, core_num = get_node_and_core_number()
            if dataloader.batch_size % node_num != 0:
                true_bs = math.ceil(
                    dataloader.batch_size / node_num) * node_num
                warning_msg = "Detect dataloader's batch_size is not divisible by node number(" + \
                              str(node_num) + "), will adjust batch_size to " + str(true_bs) + \
                              " automatically"
                warnings.warn(warning_msg)

            bys = CloudPickleSerializer.dumps(CloudPickleSerializer,
                                              dataloader)
            jvalue = callZooFunc(bigdl_type, "createFeatureSetFromPyTorch",
                                 bys, False, features, labels)
            return cls(jvalue=jvalue)
        elif callable(dataloader):
            bys = CloudPickleSerializer.dumps(CloudPickleSerializer,
                                              dataloader)
            jvalue = callZooFunc(bigdl_type, "createFeatureSetFromPyTorch",
                                 bys, True, features, labels)
            return cls(jvalue=jvalue)
        else:
            raise ValueError(
                "Unsupported dataloader type, please pass pytorch dataloader" +
                " or a function to create pytorch dataloader.")

Example #22

0

Show file

File: tfnet.py Project: EmiCareOfCell44/BigDL

    def from_saved_model(model_path, tag=None, signature=None,
                         inputs=None, outputs=None, tf_session_config=None, init_op=None):
        """
        Create a TFNet from an TensorFlow saved model
        :param model_path: the path to the SavedModel path
        :param tag: the tag to load in the saved model, default to "serve"
        :param signature: The signature of the SignatureDef that defines inputs
                          and outputs of the graph. TFNet assumes inputs is sorted
                          by their corresponding key in SignatureDef.
        :param inputs: a list input tensor names of this model, you may want to use TensorFlow's
                      command line tool to inspect the saved model to find the input tensor
                      names e.g. `saved_model_cli show --dir {saved_model_path} --all`
        :param outputs: a list output tensor names of this model, you may want to use TensorFlow's
                      command line tool to inspect the saved model to find the output tensor
                      names e.g. `saved_model_cli show --dir {saved_model_path} --all`
        :param tf_session_config: an optional tf.ConfigProto object to
                       set the session config in java side.
                       This config does not necessarily be the same with your current session.
                       E.g. sess_config = tf.ConfigProto(inter_op_parallelism_threads=1,
                                                         intra_op_parallelism_threads=1)
                            net = TFNet.from_session(sess, inputs, outputs, sess_config)
        :return: a TFNet
        """
        config_bytes = None
        if tf_session_config is not None:
            import tensorflow as tf
            assert isinstance(tf_session_config, tf.ConfigProto)
            tf_session_config.use_per_session_threads = True
            config_bytes = bytearray(tf_session_config.SerializeToString())

        if inputs is None or outputs is None:
            jvalue = callZooFunc("float", "createTFNetFromSavedModel",
                                 model_path, tag, signature, config_bytes)
        else:

            jvalue = callZooFunc("float", "createTFNetFromSavedModel",
                                 model_path, tag, inputs, outputs, config_bytes, init_op)
        return TFNet(path=None, jvalue=jvalue)

Example #23

0

Show file

File: topology.py Project: EmiCareOfCell44/BigDL

    def predict(self, x, batch_per_thread=4, distributed=True):
        """
        Use a model to do prediction.

        # Arguments
        x: Prediction data. A Numpy array or RDD of Sample or ImageSet.
        batch_per_thread:
          The default value is 4.
          When distributed is True,the total batch size is batch_per_thread * rdd.getNumPartitions.
          When distributed is False the total batch size is batch_per_thread * numOfCores.
        distributed: Boolean. Whether to do prediction in distributed mode or local mode.
                     Default is True. In local mode, x must be a Numpy array.
        """
        if isinstance(x, ImageSet) or isinstance(x, TextSet):
            results = callZooFunc(self.bigdl_type, "zooPredict", self.value, x,
                                  batch_per_thread)
            return ImageSet(results) if isinstance(
                x, ImageSet) else TextSet(results)
        if distributed:
            if isinstance(x, np.ndarray):
                data_rdd = to_sample_rdd(x, np.zeros([x.shape[0]]))
            elif isinstance(x, RDD):
                data_rdd = x
            else:
                raise TypeError("Unsupported prediction data type: %s" %
                                type(x))
            results = callZooFunc(self.bigdl_type, "zooPredict", self.value,
                                  data_rdd, batch_per_thread)
            return results.map(lambda result: Layer.convert_output(result))
        else:
            if isinstance(x, np.ndarray) or isinstance(x, list):
                results = callZooFunc(self.bigdl_type,
                                      "zooPredict", self.value,
                                      self._to_jtensors(x), batch_per_thread)
                return [Layer.convert_output(result) for result in results]
            else:
                raise TypeError("Unsupported prediction data type: %s" %
                                type(x))

Example #24

0

Show file

    def get_predicts(self):
        """
        Get the prediction results (if any) combined with uris (if any) of a TextSet.
        If a text doesn't have a uri, its corresponding uri will be None.
        If a text hasn't been predicted by a model, its corresponding prediction will be None.

        :return: List of (uri, prediction as a list of numpy array) for LocalTextSet.
                 RDD of (uri, prediction as a list of numpy array) for DistributedTextSet.
        """
        predicts = callZooFunc(self.bigdl_type, "textSetGetPredicts", self.value)
        if isinstance(predicts, RDD):
            return predicts.map(lambda predict: (predict[0], _process_predict_result(predict[1])))
        else:
            return [(predict[0], _process_predict_result(predict[1])) for predict in predicts]

Example #25

0

Show file

File: topology.py Project: EmiCareOfCell44/BigDL

    def get_train_summary(self, tag=None):
        """
        Get the scalar from model train summary
        Return 2-D array like object which could be converted
        by nd.array()
        # Arguments
        tag: The string variable represents the scalar wanted
        """
        # exception handle
        if tag != "Loss" and tag != "LearningRate" and tag != "Throughput":
            raise TypeError('Only "Loss", "LearningRate", "Throughput"' +
                            'are supported in train summary')

        return callZooFunc(self.bigdl_type, "zooGetScalarFromSummary",
                           self.value, tag, "Train")

Example #26

0

Show file

    def from_rdds(cls, image_rdd, label_rdd=None, bigdl_type="float"):
        """
        Create a ImageSet from rdds of ndarray.

        :param image_rdd: a rdd of ndarray, each ndarray should has dimension of 3 or 4 (3D images)
        :param label_rdd: a rdd of ndarray
        :return: a DistributedImageSet
        """
        image_rdd = image_rdd.map(lambda x: JTensor.from_ndarray(x))
        if label_rdd is not None:
            label_rdd = label_rdd.map(lambda x: JTensor.from_ndarray(x))
        return ImageSet(jvalue=callZooFunc(bigdl_type,
                                           "createDistributedImageSet",
                                           image_rdd, label_rdd),
                        bigdl_type=bigdl_type)

Example #27

0

Show file

def word2idx(self, remove_topN=0, max_words_num=-1, min_freq=1, existing_map=None):
"""
Map word tokens to indices.
Important: Take care that this method behaves a bit differently for training and inference.

---------------------------------------Training--------------------------------------------
During the training, you need to generate a new word_index dictionary according to the texts
you are dealing with. Thus this method will first do the dictionary generation and then
convert words to indices based on the generated dictionary.

You can specify the following arguments which pose some constraints when generating
the dictionary.
In the result dictionary, index will start from 1 and corresponds to the occurrence
frequency of each word sorted in descending order.
Here we adopt the convention that index 0 will be reserved for unknown words.
After word2idx, you can get the generated word_index dictionary by calling 'get_word_index'.
Also, you can call `save_word_index` to save this word_index dictionary to be used in
future training.

:param remove_topN: Non-negative int. Remove the topN words with highest frequencies
in the case where those are treated as stopwords.
Default is 0, namely remove nothing.
:param max_words_num: Int. The maximum number of words to be taken into consideration.
Default is -1, namely all words will be considered.
Otherwise, it should be a positive int.
:param min_freq: Positive int. Only those words with frequency >= min_freq will be taken
into consideration.
Default is 1, namely all words that occur will be considered.
:param existing_map: Existing dictionary of word_index if any.
Default is None and in this case a new dictionary with index starting
from 1 will be generated.
If not None, then the generated dictionary will preserve the word_index
in existing_map and assign subsequent indices to new words.

---------------------------------------Inference--------------------------------------------
During the inference, you are supposed to use exactly the same word_index dictionary as in
the training stage instead of generating a new one.
Thus please be aware that you do not need to specify any of the above arguments.
You need to call `load_word_index` or `set_word_index` beforehand for dictionary loading.

Need to tokenize first.
See WordIndexer for more details.

:return: TextSet after word2idx.
"""
jvalue = callZooFunc(self.bigdl_type, "textSetWord2idx", self.value,
remove_topN, max_words_num, min_freq, existing_map)
return TextSet(jvalue=jvalue)

Example #28

0

Show file

    def to_distributed(self, sc=None, partition_num=4):
        """
        Convert to a DistributedTextSet.

        Need to specify SparkContext to convert a LocalTextSet to a DistributedTextSet.
        In this case, you may also want to specify partition_num, the default of which is 4.

        :return: DistributedTextSet
        """
        if self.is_distributed():
            jvalue = self.value
        else:
            assert sc, "sc cannot be null to transform a LocalTextSet to a DistributedTextSet"
            jvalue = callZooFunc(self.bigdl_type, "textSetToDistributed", self.value,
                                 sc, partition_num)
        return DistributedTextSet(jvalue=jvalue)

Example #29

0

Show file

File: autograd.py Project: EmiCareOfCell44/BigDL

    def backward(self, y_true, y_pred):
        """
        NB: It's for debug only, please use optimizer.optimize() in production.
        Performs a back-propagation step through the criterion, with respect to the given input.

        :param input: ndarray or list of ndarray
        :param target: ndarray or list of ndarray
        :return: ndarray
        """
        input = y_pred
        target = y_true
        jinput, input_is_table = Layer.check_input(input)
        jtarget, target_is_table = Layer.check_input(target)
        output = callZooFunc(self.bigdl_type, "criterionBackward", self.value,
                             jinput, input_is_table, jtarget, target_is_table)
        return Layer.convert_output(output)

Example #30

0

Show file

File: embeddings.py Project: EmiCareOfCell44/BigDL

    def get_word_index(embedding_file, bigdl_type="float"):
        """
        Get the full wordIndex map from the given embedding_file.

        # Arguments
        embedding_file: The path to the embedding file.
                        Currently only the following GloVe files are supported:
                        "glove.6B.50d.txt", "glove.6B.100d.txt", "glove.6B.200d.txt"
                        "glove.6B.300d.txt", "glove.42B.300d.txt", "glove.840B.300d.txt".
                        You can download them from: https://nlp.stanford.edu/projects/glove/.

        # Return
        Dictionary of word (string) and its corresponding index (int) obtained from
        the given embedding file.
        """
        return callZooFunc(bigdl_type, "wordEmbeddingGetWordIndex",
                           embedding_file)