예제 #1
0
 def set_running_mean(self, running_mean):
     """
     Set the running mean of the BatchNormalization layer.
     :param running_mean: a Numpy array.
     """
     callBigDlFunc(self.bigdl_type, "setRunningMean",
                   self.value, JTensor.from_ndarray(running_mean))
     return self
예제 #2
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_gradclip_l2norm(self, clip_norm):
        """
        Configure L2 norm clipping settings.


        :param clip_norm: gradient L2-Norm threshold
        """
        callBigDlFunc(self.bigdl_type, "setL2NormClip", self.value, clip_norm)
예제 #3
0
 def set_running_std(self, running_std):
     """
     Set the running variance of the BatchNormalization layer.
     :param running_std: a Numpy array.
     """
     callBigDlFunc(self.bigdl_type, "setRunningStd",
                   self.value, JTensor.from_ndarray(running_std))
     return self
예제 #4
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_criterion(self, criterion):
        """
        set new criterion, for optimizer reuse

        :param criterion: new criterion
        :return:
        """
        callBigDlFunc(self.bigdl_type, "setCriterion", self.value,
                      criterion)
예제 #5
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_gradclip_const(self, min_value, max_value):
        """
        Configure constant clipping settings.


        :param min_value: the minimum value to clip by
        :param max_value: the maxmimum value to clip by
        """
        callBigDlFunc(self.bigdl_type, "setConstantClip", self.value, min_value, max_value)
예제 #6
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_traindata(self, training_rdd, batch_size):
        """
        Set new training dataset, for optimizer reuse

        :param training_rdd: the training dataset
        :param batch_size: training batch size
        :return:
        """
        callBigDlFunc(self.bigdl_type, "setTrainData", self.value,
                     training_rdd, batch_size)
예제 #7
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_train_summary(self, summary):
        """
        Set train summary. A TrainSummary object contains information
        necessary for the optimizer to know how often the logs are recorded,
        where to store the logs and how to retrieve them, etc. For details,
        refer to the docs of TrainSummary.


        :param summary: a TrainSummary object
        """
        callBigDlFunc(self.bigdl_type, "setTrainSummary", self.value,
                      summary)
        return self
예제 #8
0
    def save_graph_topology(self, log_path, backward=False):
        """
        Save the current model graph to a folder, which can be displayed in TensorBoard
        by running the command:
        tensorboard --logdir log_path

        # Arguments
        log_path: The path to save the model graph.
        backward: The name of the application.
        """
        callBigDlFunc(self.bigdl_type, "zooSaveGraphTopology",
                      self.value,
                      log_path,
                      backward)
예제 #9
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_checkpoint(self, checkpoint_trigger,
                       checkpoint_path, isOverWrite=True):
        """
        Configure checkpoint settings.


        :param checkpoint_trigger: the interval to write snapshots
        :param checkpoint_path: the path to write snapshots into
        :param isOverWrite: whether to overwrite existing snapshots in path.default is True
        """
        if not os.path.exists(checkpoint_path):
            mkpath(checkpoint_path)
        callBigDlFunc(self.bigdl_type, "setCheckPoint", self.value,
                      checkpoint_trigger, checkpoint_path, isOverWrite)
예제 #10
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_validation(self, batch_size, X_val, Y_val, trigger, val_method=None):
        """
        Configure validation settings.

        :param batch_size: validation batch size
        :param X_val: features of validation dataset
        :param Y_val: label of validation dataset
        :param trigger: validation interval
        :param val_method: the ValidationMethod to use,e.g. "Top1Accuracy", "Top5Accuracy", "Loss"
        """
        if val_method is None:
            val_method = [Top1Accuracy()]
        callBigDlFunc(self.bigdl_type, "setValidation", self.value, batch_size,
                      trigger, [JTensor.from_ndarray(X) for X in to_list(X_val)],
                      JTensor.from_ndarray(Y_val), to_list(val_method))
예제 #11
0
 def transform(self, dataset):
     """
     Apply the transformer to the images in "inputCol" and store the transformed result
     into "outputCols"
     """
     self._transfer_params_to_java()
     return callBigDlFunc(self.bigdl_type, "dlImageTransform", self.value, dataset)
예제 #12
0
def abs(x):
    """
    Element-wise absolute value.
    :param x: A variable.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "abs", x))
예제 #13
0
def softplus(x):
    """
    Softplus of a variable.
    :param x: A variable.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "softplus", x))
예제 #14
0
def neg(x):
    """
    Computes numerical negative value element-wise.
    :param x: A variable.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "neg", x))
예제 #15
0
def exp(x):
    """
    Element-wise exponential.
    :param x: A variable.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "exp", x))
예제 #16
0
def sqrt(x):
    """
    Element-wise square root.
    :param x: A variable.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "sqrt", x))
예제 #17
0
def log(x):
    """
    Element-wise log.
    :param x: A variable.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "log", x))
예제 #18
0
파일: image.py 프로젝트: ru003ar/BigDL
 def get_image(self, float_key="floats", to_chw=True):
     """
     get image rdd from ImageFrame
     """
     tensor_rdd = callBigDlFunc(self.bigdl_type,
                            "distributedImageFrameToImageTensorRdd", self.value, float_key, to_chw)
     return tensor_rdd.map(lambda tensor: tensor.to_ndarray())
예제 #19
0
 def __call__(self, image_set, bigdl_type="float"):
     """
     transform ImageSet
     """
     jset = callBigDlFunc(bigdl_type,
                          "transformImageSet", self.value, image_set)
     return ImageSet(jvalue=jset)
예제 #20
0
파일: image.py 프로젝트: ru003ar/BigDL
 def transform(self, transformer, bigdl_type="float"):
     """
     transformImageFrame
     """
     self.value = callBigDlFunc(bigdl_type,
                              "transformImageFrame", transformer, self.value)
     return self
예제 #21
0
파일: image.py 프로젝트: ru003ar/BigDL
 def __call__(self, image_frame, bigdl_type="float"):
     """
     transform ImageFrame
     """
     jframe = callBigDlFunc(bigdl_type,
                          "transformImageFrame", self.value, image_frame)
     return ImageFrame(jvalue=jframe)
예제 #22
0
파일: image.py 프로젝트: ru003ar/BigDL
 def get_image(self, float_key="floats", to_chw=True):
     """
     get image list from ImageFrame
     """
     tensors = callBigDlFunc(self.bigdl_type,
                                "localImageFrameToImageTensor", self.value, float_key, to_chw)
     return map(lambda tensor: tensor.to_ndarray(), tensors)
예제 #23
0
파일: image.py 프로젝트: ru003ar/BigDL
 def get_image(self, float_key="floats", to_chw=True):
     """
     get image as ndarray from ImageFeature
     """
     tensor = callBigDlFunc(self.bigdl_type, "imageFeatureToImageTensor", self.value,
                            float_key, to_chw)
     return tensor.to_ndarray()
예제 #24
0
def maximum(x, y):
    """
    Element-wise maximum of two variables.
    :param x: A variable.
    :param y: A variable.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "maximum", x, y))
예제 #25
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def set_validation(self, batch_size, val_rdd, trigger, val_method=None):
        """
        Configure validation settings.


        :param batch_size: validation batch size
        :param val_rdd: validation dataset
        :param trigger: validation interval
        :param val_method: the ValidationMethod to use,e.g. "Top1Accuracy", "Top5Accuracy", "Loss"
        """
        if val_method is None:
            val_method = [Top1Accuracy()]
        func_name = "setValidation"
        if isinstance(val_rdd, DataSet):
            func_name = "setValidationFromDataSet"
        callBigDlFunc(self.bigdl_type, func_name, self.value, batch_size,
                      trigger, val_rdd, to_list(val_method))
예제 #26
0
def pow(x, a):
    """
    Element-wise exponentiation.
    :param x: A variable.
    :param a: Python integer.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "pow", x, float(a)))
예제 #27
0
def l2_normalize(x, axis):
    """
    Normalizes a tensor wrt the L2 norm alongside the specified axis.
    :param x: A variable. Shape should only be [batch, xx]
    :param axis: axis along which to perform normalization.
    :return: A variable.
    """
    return Variable.from_jvalue(callBigDlFunc("float", "l2Normalize", x, int(axis)))
예제 #28
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
 def __init__(self, jvalue, bigdl_type, *args):
     if (jvalue):
         assert(type(jvalue) == JavaObject)
         self.value = jvalue
     else:
         self.value = callBigDlFunc(
             bigdl_type, JavaValue.jvm_class_constructor(self), *args)
     self.bigdl_type = bigdl_type
예제 #29
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
    def add(self, scheduler, max_iteration, bigdl_type="float"):
        """
        Add a learning rate scheduler to the contained `schedules`

        :param scheduler: learning rate scheduler to be add
        :param max_iteration: iteration numbers this scheduler will run
        """
        return callBigDlFunc(bigdl_type, "addScheduler", self.value, scheduler, max_iteration)
예제 #30
0
파일: optimizer.py 프로젝트: ru003ar/BigDL
 def save(self, path, overWrite):
     """
     save OptimMethod
     :param path      path
     :param overWrite whether to overwrite
     """
     method=self.value
     return callBigDlFunc(self.bigdl_type, "saveOptimMethod", method, path, overWrite)
예제 #31
0
    def load_model(path, weight_path=None, bigdl_type="float"):
        """
        Load an existing TextClassifier model (with weights).

        # Arguments
        path: The path for the pre-defined model.
              Local file system, HDFS and Amazon S3 are supported.
              HDFS path should be like 'hdfs://[host]:[port]/xxx'.
              Amazon S3 path should be like 's3a://bucket/xxx'.
        weight_path: The path for pre-trained weights if any. Default is None.
        """
        jmodel = callBigDlFunc(bigdl_type, "loadTextClassifier", path,
                               weight_path)
        model = ZooModel._do_load(jmodel, bigdl_type)
        model.__class__ = TextClassifier
        return model
예제 #32
0
    def forward(self, input, target):
        """
        NB: It's for debug only, please use optimizer.optimize() in production.
        Takes an input object, and computes the corresponding loss of the criterion,
        compared with `target`

        :param input: ndarray or list of ndarray
        :param target: ndarray or list of ndarray
        :return: value of loss
        """
        jinput, input_is_table = Layer.check_input(input)
        jtarget, target_is_table = Layer.check_input(target)
        output = callBigDlFunc(self.bigdl_type, "criterionForward", self.value,
                               jinput, input_is_table, jtarget,
                               target_is_table)
        return output
예제 #33
0
 def files_to_image_frame(cls,
                          url,
                          sc,
                          class_num,
                          partition_num=-1,
                          bigdl_type="float"):
     """
     Extract hadoop sequence files from an HDFS path as ImageFrame
     :param url: sequence files folder path
     :param sc: spark context
     :param class_num: class number of data
     :param partition_num: partition number, default: Engine.nodeNumber() * Engine.coreNumber()
     """
     jvalue = callBigDlFunc(bigdl_type, "seqFilesToImageFrame", url, sc,
                            class_num, partition_num)
     return ImageFrame(jvalue=jvalue)
예제 #34
0
    def to_distributed(self, sc=None, partition_num=4):
        """
        Convert to a DistributedTextSet.

        Need to specify SparkContext to convert a LocalTextSet to a DistributedTextSet.
        In this case, you may also want to specify partition_num, the default of which is 4.

        :return: DistributedTextSet
        """
        if self.is_distributed():
            jvalue = self.value
        else:
            assert sc, "sc cannot be null to transform a LocalTextSet to a DistributedTextSet"
            jvalue = callBigDlFunc(self.bigdl_type, "textSetToDistributed", self.value,
                                   sc, partition_num)
        return DistributedTextSet(jvalue=jvalue)
예제 #35
0
 def __call__(self, x=None):
     """
     Some other modules point to current module
     :param x: upstream module nodes. x is either a Node or list of Node.
     :return: node containing current module
     """
     x = to_list(x if x else [])
     layer = self
     if isinstance(self, Lambda):
         input_shapes = [
             ZooKerasLayer.of(node.element().value).get_output_shape()
             for node in x
         ]
         layer = self.create(remove_batch(input_shapes))
     return Node.of(
         callBigDlFunc(self.bigdl_type, "createNode", layer, to_list(x)))
예제 #36
0
    def backward(self, y_true, y_pred):
        """
        NB: It's for debug only, please use optimizer.optimize() in production.
        Performs a back-propagation step through the criterion, with respect to the given input.

        :param input: ndarray or list of ndarray
        :param target: ndarray or list of ndarray
        :return: ndarray
        """
        input = y_pred
        target = y_true
        jinput, input_is_table = Layer.check_input(input)
        jtarget, target_is_table = Layer.check_input(target)
        output = callBigDlFunc(self.bigdl_type, "criterionBackward",
                               self.value, jinput, input_is_table, jtarget,
                               target_is_table)
        return Layer.convert_output(output)
예제 #37
0
    def word2idx(self, remove_topN=0, max_words_num=-1):
        """
        Map word tokens to indices.
        Index will start from 1 and corresponds to the occurrence frequency of each word sorted
        in descending order.
        See WordIndexer for more details.

        :param remove_topN: Int. Remove the topN words with highest frequencies in the case
                            where those are treated as stopwords.
                            Default is 0, namely remove nothing.
        :param max_words_num: Int. The maximum number of words to be taken into consideration.
                              Default is -1, namely all words will be considered.
        :return: TextSet after word2idx.
        """
        jvalue = callBigDlFunc(self.bigdl_type, "textSetWord2idx", self.value,
                               remove_topN, max_words_num)
        return TextSet(jvalue=jvalue)
예제 #38
0
 def set_summary_trigger(self, name, trigger):
     """
     Set the interval of recording for each indicator. 
     
     :param tag: tag name. Supported tag names are "LearningRate", "Loss", 
        "Throughput", "Parameters". "Parameters" is an umbrella tag that 
        includes weight, bias, gradWeight, gradBias, and some running status
        (eg. runningMean and runningVar in BatchNormalization). If you 
        didn't set any triggers, we will by default record Loss and Throughput
        in each iteration, while *NOT* recording LearningRate and Parameters,
        as recording parameters may introduce substantial overhead when the
        model is very big, LearningRate is not a public attribute for all
        OptimMethod.
     :param trigger: trigger
     """
     return callBigDlFunc(self.bigdl_type, "summarySetTrigger", self.value,
                          name, trigger)
예제 #39
0
 def index_select(self, dim, index):
     """
        Select an index of the input in the given dim and return the subset part.
        The batch dimension needs to be unchanged.
        The selected dim would be remove after this operation.
        For example, if input is:
        1 2 3
        4 5 6
        Select(1, 1) will give output [2 5]
        Select(1, -1) will give output [3 6]
     :param dim: The dimension to select. 0-based index. Cannot select the batch dimension.
             -1 means the last dimension of the input.
     :param index: The index of the dimension to be selected. 0-based index.
            -1 means the last dimension of the input.
     :return:
     """
     return Variable.from_jvalue(callBigDlFunc("float", "indexSelect", self, dim, index))
예제 #40
0
    def get_word_index(embedding_file, bigdl_type="float"):
        """
        Get the full wordIndex map from the given embedding_file.

        # Arguments
        embedding_file: The path to the embedding file.
                        Currently only the following GloVe files are supported:
                        "glove.6B.50d.txt", "glove.6B.100d.txt", "glove.6B.200d.txt"
                        "glove.6B.300d.txt", "glove.42B.300d.txt", "glove.840B.300d.txt".
                        You can download them from: https://nlp.stanford.edu/projects/glove/.

        # Returns
        Dictionary of word (string) and its corresponding index (int) obtained from
        the given embedding file.
        """
        return callBigDlFunc(bigdl_type, "wordEmbeddingGetWordIndex",
                             embedding_file)
예제 #41
0
    def load_word_index(self, path):
        """
        Load the word_index map which was saved after the training, so that this TextSet can
        directly use this word_index during inference.
        Each separate line should be "word id".

        Note that after calling `load_word_index`, you do not need to specify any argument when
        calling `word2idx` in the preprocessing pipeline as now you are using exactly the loaded
        word_index for transformation.

        For LocalTextSet, load txt from a local file system.
        For DistributedTextSet, load txt from a local or distributed file system (such as HDFS).

        :return: TextSet with the loaded word_index.
        """
        jvalue = callBigDlFunc(self.bigdl_type, "textSetLoadWordIndex",
                               self.value, path)
        return TextSet(jvalue=jvalue)
예제 #42
0
파일: image.py 프로젝트: wlu-mstr/BigDL
    def __init__(self,
                 image_list=None,
                 label_list=None,
                 jvalue=None,
                 bigdl_type="float"):
        assert jvalue or image_list, "jvalue and image_list cannot be None in the same time"
        if jvalue:
            self.value = jvalue
        else:
            # init from image ndarray list and label rdd(optional)
            image_tensor_list = map(lambda image: JTensor.from_ndarray(image),
                                    image_list)
            label_tensor_list = map(lambda label: JTensor.from_ndarray(label),
                                    label_list) if label_list else None
            self.value = callBigDlFunc(bigdl_type,
                                       JavaValue.jvm_class_constructor(self),
                                       image_tensor_list, label_tensor_list)

        self.bigdl_type = bigdl_type
예제 #43
0
def prepare_embedding(embedding_file,
                      word_index=None,
                      randomize_unknown=False,
                      normalize=False):
    """
    Prepare embedding weights from embedding_file given word_index.

    # Arguments
    embedding_file and word_index: See WordEmbedding.
    randomize_unknown: Boolean. Whether to randomly initialize words that don't exist in
                       embedding_file. Default is False and in this case corresponding entries
                       to unknown words will be zero vectors.
    normalize: Boolean. Whether to normalize word vectors. Default is False.

    # Return
    Pretrained embedding weights as a numpy array.
    """
    return callBigDlFunc("float", "prepareEmbedding", embedding_file,
                         word_index, randomize_unknown,
                         normalize).to_ndarray()
예제 #44
0
 def load_tf(path,
             inputs,
             outputs,
             byte_order="little_endian",
             bin_file=None,
             bigdl_type="float"):
     """
     Load a pre-trained Tensorflow model.
     :param path: The path containing the pre-trained model.
     :param inputs: The input nodes of this graph
     :param outputs: The output nodes of this graph
     :param byte_order: byte_order of the file, `little_endian` or `big_endian`
     :param bin_file: the optional bin file
                     produced by bigdl dump_model util function to store the weights.
                     Default is None.
     :return: A pre-trained model.
     """
     jmodel = callBigDlFunc(bigdl_type, "netLoadTF", path, inputs, outputs,
                            byte_order, bin_file)
     return GraphNet.from_jvalue(jmodel, bigdl_type)
예제 #45
0
    def unroll(cls, data_rdd, unroll_length, predict_step=1):
        """
        Unroll a rdd of arrays to prepare features and labels.

        # Arguments
        data_rdd: RDD[Array]. data to be unrolled, it holds original time series features
        unroll_length: Int. the length of precious values to predict future value.
        predict_step: Int. How many time steps to predict future value, default is 1.
        return: an rdd of FeatureLableIndex
        a simple example
                     data: (1,2,3,4,5,6); unrollLength: 2, predictStep: 1
                     features, label, index
                     (1,2), 3, 0
                     (2,3), 4, 1
                     (3,4), 5, 2
                     (4,5), 6, 3
        """
        result = callBigDlFunc("float", "unroll", data_rdd, unroll_length,
                               predict_step)
        return cls._to_indexed_rdd(result)
예제 #46
0
 def readImages(path,
                sc=None,
                minPartitions=1,
                resizeH=-1,
                resizeW=-1,
                bigdl_type="float"):
     """
     Read the directory of images into DataFrame from the local or remote source.
     :param path Directory to the input data files, the path can be comma separated paths as the
             list of inputs. Wildcards path are supported similarly to sc.binaryFiles(path).
     :param min_partitions A suggestion value of the minimal splitting number for input data.
     :param resizeH height after resize
     :param resizeW width after resize
     :return DataFrame with a single column "image"; Each record in the column represents
             one image record: Row (uri, height, width, channels, CvType, bytes).
     """
     df = callBigDlFunc(bigdl_type, "nnReadImage", path, sc, minPartitions,
                        resizeH, resizeW)
     df._sc._jsc = sc._jsc
     return df
예제 #47
0
 def __init__(self, pre_processor=None,
              post_processor=None,
              batch_per_partition=4,
              label_map=None, feature_padding_param=None, jvalue=None, bigdl_type="float"):
     self.bigdl_type = bigdl_type
     if jvalue:
         self.value = jvalue
     else:
         if pre_processor:
             assert issubclass(pre_processor.__class__, Preprocessing), \
                 "the pre_processor should be subclass of Preprocessing"
         if post_processor:
             assert issubclass(post_processor.__class__, Preprocessing), \
                 "the pre_processor should be subclass of Preprocessing"
         self.value = callBigDlFunc(
             bigdl_type, JavaValue.jvm_class_constructor(self),
             pre_processor,
             post_processor,
             batch_per_partition,
             label_map,
             feature_padding_param)
예제 #48
0
 def __init__(self, pre_processor=None,
              post_processor=None,
              batch_per_partition=4,
              label_map=None, feature_padding_param=None, jvalue=None, bigdl_type="float"):
     self.bigdl_type=bigdl_type
     if jvalue:
         self.value = jvalue
     else:
         if pre_processor:
             assert pre_processor.__class__.__bases__[0].__name__ == "FeatureTransformer",\
                 "the pre_processor should be subclass of FeatureTransformer"
         if post_processor:
             assert post_processor.__class__.__bases__[0].__name__ == "FeatureTransformer", \
                 "the pre_processor should be subclass of FeatureTransformer"
         self.value = callBigDlFunc(
             bigdl_type, JavaValue.jvm_class_constructor(self),
             pre_processor,
             post_processor,
             batch_per_partition,
             label_map,
             feature_padding_param)
예제 #49
0
    def read_csv(cls, path, sc=None, min_partitions=1, bigdl_type="float"):
        """
        Read texts with id from csv file.
        Each record is supposed to contain the following two fields in order:
        id(string) and text(string).
        Note that the csv file should be without header.

        :param path: The path to the csv file. Local or distributed file system (such as HDFS)
                     are supported. If you want to read from a distributed file system, sc
                     needs to be specified.
        :param sc: An instance of SparkContext.
                   If specified, texts will be read as a DistributedTextSet.
                   Default is None and in this case texts will be read as a LocalTextSet.
        :param min_partitions: Int. A suggestion value of the minimal partition number for input
                               texts. Only need to specify this when sc is not None. Default is 1.

        :return: TextSet.
        """
        jvalue = callBigDlFunc(bigdl_type, "textSetReadCSV", path, sc,
                               min_partitions)
        return TextSet(jvalue=jvalue)
예제 #50
0
    def __init__(self,
                 model,
                 training_rdd,
                 criterion,
                 end_trigger,
                 batch_size,
                 optim_method=None,
                 bigdl_type="float"):
        """
        Create an optimizer.


        :param model: the neural net model
        :param training_data: the training dataset
        :param criterion: the loss function
        :param optim_method: the algorithm to use for optimization,
           e.g. SGD, Adagrad, etc. If optim_method is None, the default algorithm is SGD.
        :param end_trigger: when to end the optimization
        :param batch_size: training batch size
        """
        if not optim_method:
            optim_methods = {model.name(): SGD()}
        elif isinstance(optim_method, OptimMethod):
            optim_methods = {model.name(): optim_method}
        elif isinstance(optim_method, JavaObject):
            optim_methods = {
                model.name(): OptimMethod(optim_method, bigdl_type)
            }
        else:
            optim_methods = optim_method
        if isinstance(training_rdd, RDD):
            JavaValue.__init__(self, None, bigdl_type, model.value,
                               training_rdd, criterion, optim_methods,
                               end_trigger, batch_size)
        elif isinstance(training_rdd, DataSet):
            self.bigdl_type = bigdl_type
            self.value = callBigDlFunc(self.bigdl_type,
                                       "createDistriOptimizerFromDataSet",
                                       model.value, training_rdd, criterion,
                                       optim_methods, end_trigger, batch_size)
예제 #51
0
 def slice(self, dim, start_index, length):
     """
     Same as narrow in Torch.
     Slice the input with the number of dimensions not being reduced.
     The batch dimension needs to be unchanged.
     For example, if input is:
     1 2 3
     4 5 6
     slice(1, 1, 2) will give output
     2 3
     5 6
     slice(1, 2, -1) will give output
     3
     6
     :param  dim The dimension to narrow. 0-based index. Cannot narrow the batch dimension.
             -1 means the last dimension of the input.
     :param  startIndex Non-negative integer.
             The start index on the given dimension. 0-based index.
     :param length The length to be sliced. Default is 1.
     """
     return Variable.from_jvalue(
         callBigDlFunc("float", "slice", self, dim, start_index, length))
예제 #52
0
 def readImages(path,
                sc=None,
                minPartitions=1,
                resizeH=-1,
                resizeW=-1,
                image_codec=-1,
                bigdl_type="float"):
     """
     Read the directory of images into DataFrame from the local or remote source.
     :param path Directory to the input data files, the path can be comma separated paths as the
             list of inputs. Wildcards path are supported similarly to sc.binaryFiles(path).
     :param min_partitions A suggestion value of the minimal splitting number for input data.
     :param resizeH height after resize, by default is -1 which will not resize the image
     :param resizeW width after resize, by default is -1 which will not resize the image
     :param image_codec specifying the color type of a loaded image, same as in OpenCV.imread.
            By default is Imgcodecs.CV_LOAD_IMAGE_UNCHANGED(-1)
     :return DataFrame with a single column "image"; Each record in the column represents
             one image record: Row (uri, height, width, channels, CvType, bytes).
     """
     df = callBigDlFunc(bigdl_type, "nnReadImage", path, sc, minPartitions,
                        resizeH, resizeW, image_codec)
     df._sc._jsc = sc._jsc
     return df
예제 #53
0
def batch_dot(x, y, axes=1, normalize=False):
    """
    Operator that computes a dot product between samples in two tensors.

    E.g. if applied to two tensors `a` and `b` of shape `(batch_size, n)`,
    the output will be a tensor of shape `(batch_size, 1)`
    where each entry `i` will be the dot product between
    `a[i]` and `b[i]`.

    :param x: Shape should only be [batch, xx]
    :param y: Shape should only be [batch, xx]
    :param axes: Integer or tuple of integers,
                axis or axes along which to take the dot product.
    :param normalize: Whether to L2-normalize samples along the
                dot product axis before taking the dot product.
                If set to True, then the output of the dot product
                is the cosine proximity between the two samples.
    :return: A variable.
    """
    if not normalize:
        if isinstance(axes, int):
            axes = [axes] * 2
    return Variable.from_jvalue(callBigDlFunc("float", "batchDot", x, y, axes, normalize))
예제 #54
0
    def from_relation_lists(cls,
                            relations,
                            corpus1,
                            corpus2,
                            bigdl_type="float"):
        """
        Used to generate a TextSet for ranking.

        This method does the following:
        1. For each id1 in relations, find the list of id2 with corresponding label that
        comes together with id1.
        In other words, group relations by id1.
        2. Join with corpus to transform each id to indexedTokens.
        Note: Make sure that the corpus has been transformed by SequenceShaper and WordIndexer.
        3. For each list, generate a TextFeature having Sample with:
        - feature of shape (list_length, text1_length + text2_length).
        - label of shape (list_length, 1).

        :param relations: List or RDD of Relation.
        :param corpus1: TextSet that contains all id1 in relations. For each TextFeature in corpus1,
                        text must have been transformed to indexedTokens of the same length.
        :param corpus2: TextSet that contains all id2 in relations. For each TextFeature in corpus2,
                        text must have been transformed to indexedTokens of the same length.
        Note that if relations is a list, then corpus1 and corpus2 must both be LocalTextSet.
        If relations is RDD, then corpus1 and corpus2 must both be DistributedTextSet.

        :return: TextSet.
        """
        if isinstance(relations, RDD):
            relations = relations.map(lambda x: x.to_tuple())
        elif isinstance(relations, list):
            relations = [relation.to_tuple() for relation in relations]
        else:
            raise TypeError("relations should be RDD or list of Relation")
        jvalue = callBigDlFunc(bigdl_type, "textSetFromRelationLists",
                               relations, corpus1, corpus2)
        return TextSet(jvalue=jvalue)
예제 #55
0
    def init_from_existing_model(path,
                                 weight_path=None,
                                 input_seq_len=-1.0,
                                 hidden_drop=-1.0,
                                 attn_drop=-1.0,
                                 output_all_block=True,
                                 bigdl_type="float"):
        """
        Load an existing BERT model (with weights).

        # Arguments
        path: The path for the pre-defined model.
              Local file system, HDFS and Amazon S3 are supported.
              HDFS path should be like 'hdfs://[host]:[port]/xxx'.
              Amazon S3 path should be like 's3a://bucket/xxx'.
        weight_path: The path for pre-trained weights if any. Default is None.
        """
        jlayer = callBigDlFunc(bigdl_type, "loadBERT", path, weight_path,
                               input_seq_len, hidden_drop, attn_drop,
                               output_all_block)

        model = Layer(jvalue=jlayer, bigdl_type=bigdl_type)
        model.__class__ = BERT
        return model
예제 #56
0
 def __init__(self, jvalue, bigdl_type, *args):
     self.value = jvalue if jvalue else callBigDlFunc(
         bigdl_type, JavaValue.jvm_class_constructor(self), *args)
     self.bigdl_type = bigdl_type
예제 #57
0
def get_negative_samples(indexed):
    return callBigDlFunc("float", "getNegativeSamples",
                         indexed)
예제 #58
0
 def unfreeze(self, names):
     callBigDlFunc(self.bigdl_type, "unFreeze", self.value, names)
예제 #59
0
 def freeze_up_to(self, names):
     callBigDlFunc(self.bigdl_type, "freezeUpTo", self.value, names)
예제 #60
0
 def new_graph(self, outputs):
     value = callBigDlFunc(self.bigdl_type, "newGraph", self.value, outputs)
     return self.from_jvalue(value)