Beispiel #1
0
def outlierBySd(X: OperationNode, max_iterations: int,
                **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    params_dict = {'X': X, 'max_iterations': max_iterations}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'outlierBySd', named_input_nodes=params_dict)
Beispiel #2
0
def vectorToCsv(mask: OperationNode) -> Matrix:

    mask._check_matrix_op()
    params_dict = {'mask': mask}
    return Matrix(mask.sds_context,
                  'vectorToCsv',
                  named_input_nodes=params_dict)
Beispiel #3
0
def discoverFD(X: OperationNode, Mask: OperationNode,
               threshold: float) -> Matrix:

    X._check_matrix_op()
    Mask._check_matrix_op()
    params_dict = {'X': X, 'Mask': Mask, 'threshold': threshold}
    return Matrix(X.sds_context, 'discoverFD', named_input_nodes=params_dict)
Beispiel #4
0
def kmeans(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
    """
    Performs KMeans on matrix input.

    :param x: Input dataset to perform K-Means on.
    :param k: The number of centroids to use for the algorithm.
    :param runs: The number of concurrent instances of K-Means to run (with different initial centroids).
    :param max_iter: The maximum number of iterations to run the K-Means algorithm for.
    :param eps: Tolerance for the algorithm to declare convergence using WCSS change ratio.
    :param is_verbose: Boolean flag if the algorithm should be run in a verbose manner.
    :param avg_sample_size_per_centroid: The average number of records per centroid in the data samples.
    :return: `OperationNode` List containing two outputs 1. the clusters, 2 the cluster ID associated with each row in x.
    """

    x._check_matrix_op()
    if x.shape[0] == 0:
        raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required."
                         .format(s=x.shape))

    if 'k' in kwargs.keys() and kwargs.get('k') < 1:
        raise ValueError(
            "Invalid number of clusters in K-Means, number must be integer above 0")

    params_dict = {'X': x}
    params_dict.update(kwargs)
    return OperationNode(x.sds_context, 'kmeans', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2)
def pca(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
    """
    Performs PCA on the matrix input

    :param x: Input dataset to perform Principal Componenet Analysis (PCA) on.
    :param K: The number of reduced dimensions.
    :param center: Boolean specifying if the input values should be centered.
    :param scale: Boolean specifying if the input values should be scaled.
     :return: `OperationNode` List containing two outputs 1. The dimensionality reduced X input, 2. A matrix to reduce dimensionality similarly on unseen data.
    """

    x._check_matrix_op()
    if x.shape[0] == 0:
        raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required."
                         .format(s=x.shape))

    if 'K' in kwargs.keys() and kwargs.get('K') < 1:
        raise ValueError(
            "Invalid number of dimensions in PCA, number must be integer above 0")

    if 'scale' in kwargs.keys():
        if kwargs.get('scale') == True:
            kwargs.set('scale', "TRUE")
        elif kwargs.get('scale' == False):
            kwargs.set('scale', "FALSE")

    if 'center' in kwargs.keys():
        if kwargs.get('center') == True:
            kwargs.set('center', "TRUE")
        elif kwargs.get('center' == False):
            kwargs.set('center', "FALSE")

    params_dict = {'X': x}
    params_dict.update(kwargs)
    return OperationNode(x.sds_context, 'pca', named_input_nodes=params_dict,  output_type=OutputType.LIST, number_of_outputs=2)
Beispiel #6
0
 def read(self, path: os.PathLike,
          **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode':
     """ Read an file from disk. Supportted types include:
     CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binay
     See: http://apache.github.io/systemds/site/dml-language-reference#readwrite-built-in-functions for more details
     :return: an Operation Node, containing the read data.
     """
     data_type = kwargs.get("data_type", None)
     file_format = kwargs.get("format", None)
     if data_type == "frame":
         kwargs["data_type"] = f'"{data_type}"'
         if isinstance(file_format, str):
             kwargs["format"] = f'"{kwargs["format"]}"'
         return Frame(self, None, f'"{path}"', **kwargs)
     elif data_type == "scalar":
         kwargs["data_type"] = f'"{data_type}"'
         value_type = kwargs.get("value_type", None)
         if value_type == "string":
             kwargs["value_type"] = f'"{kwargs["value_type"]}"'
             return OperationNode(
                 self,
                 "read",
                 [f'"{path}"'],
                 named_input_nodes=kwargs,
                 shape=(-1, ),
                 output_type=OutputType.SCALAR,
             )
     return OperationNode(self,
                          "read", [f'"{path}"'],
                          named_input_nodes=kwargs,
                          shape=(-1, ))
Beispiel #7
0
def bandit(X_train: OperationNode, Y_train: OperationNode, metaList: Iterable,
           targetList: Iterable, lp: OperationNode, primitives: OperationNode,
           param: OperationNode, **kwargs: Dict[str,
                                                VALID_INPUT_TYPES]) -> Matrix:

    X_train._check_matrix_op()
    Y_train._check_matrix_op()
    params_dict = {
        'X_train': X_train,
        'Y_train': Y_train,
        'metaList': metaList,
        'targetList': targetList,
        'lp': lp,
        'primitives': primitives,
        'param': param
    }
    params_dict.update(kwargs)
    return OperationNode(X_train.sds_context,
                         'bandit',
                         named_input_nodes=params_dict,
                         output_type=OutputType.LIST,
                         number_of_outputs=4,
                         output_types=[
                             OutputType.FRAME, OutputType.MATRIX,
                             OutputType.MATRIX, OutputType.FRAME
                         ])
Beispiel #8
0
def alsDS(X: OperationNode,
          **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
    """
    :param V: Location to read the input matrix V to be factorized
    :param L: Location to write the factor matrix L
    :param R: Location to write the factor matrix R
    :param rank: Rank of the factorization
    :param lambda: Regularization parameter, no regularization if 0.0
    :param maxi: Maximum number of iterations
    :param check: Check for convergence after every iteration, i.e., updating L and R once
    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
    :param if: in loss in any two consecutive iterations falls below this threshold;
    :param if: FALSE thr is ignored
    :return: 'OperationNode' containing x n matrix r 
    """

    X._check_matrix_op()
    params_dict = {'X': X}
    params_dict.update(kwargs)
    return OperationNode(X.sds_context,
                         'alsDS',
                         named_input_nodes=params_dict,
                         output_type=OutputType.LIST,
                         number_of_outputs=2,
                         output_types=[OutputType.MATRIX, OutputType.MATRIX])
Beispiel #9
0
def components(G: OperationNode, **kwargs: Dict[str,
                                                VALID_INPUT_TYPES]) -> Matrix:

    G._check_matrix_op()
    params_dict = {'G': G}
    params_dict.update(kwargs)
    return Matrix(G.sds_context, 'components', named_input_nodes=params_dict)
Beispiel #10
0
def outlier(X: OperationNode, opposite: bool) -> OperationNode:

    X._check_matrix_op()
    params_dict = {'X': X, 'opposite': opposite}
    return OperationNode(X.sds_context,
                         'outlier',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #11
0
def imputeByMedian(X: OperationNode, mask: OperationNode) -> Matrix:

    X._check_matrix_op()
    mask._check_matrix_op()
    params_dict = {'X': X, 'mask': mask}
    return Matrix(X.sds_context,
                  'imputeByMedian',
                  named_input_nodes=params_dict)
Beispiel #12
0
def vectorToCsv(vector: OperationNode) -> OperationNode:

    vector._check_matrix_op()
    params_dict = {'vector': vector}
    return OperationNode(vector.sds_context,
                         'vectorToCsv',
                         named_input_nodes=params_dict,
                         output_type=OutputType.STRING)
Beispiel #13
0
def getAccuracy(y: OperationNode, yhat: OperationNode,
                **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    y._check_matrix_op()
    yhat._check_matrix_op()
    params_dict = {'y': y, 'yhat': yhat}
    params_dict.update(kwargs)
    return Matrix(y.sds_context, 'getAccuracy', named_input_nodes=params_dict)
Beispiel #14
0
def vectorToCsv(mask: OperationNode) -> OperationNode:

    mask._check_matrix_op()
    params_dict = {'mask': mask}
    return OperationNode(mask.sds_context,
                         'vectorToCsv',
                         named_input_nodes=params_dict,
                         output_type=OutputType.STRING)
Beispiel #15
0
def lmCG(X: OperationNode, y: OperationNode,
         **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    y._check_matrix_op()
    params_dict = {'X': X, 'y': y}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'lmCG', named_input_nodes=params_dict)
Beispiel #16
0
def img_mirror(img_in: OperationNode, horizontal_axis: bool) -> OperationNode:

    img_in._check_matrix_op()
    params_dict = {'img_in': img_in, 'horizontal_axis': horizontal_axis}
    return OperationNode(img_in.sds_context,
                         'img_mirror',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #17
0
def msvm(X: OperationNode, Y: OperationNode,
         **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    Y._check_matrix_op()
    params_dict = {'X': X, 'Y': Y}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'msvm', named_input_nodes=params_dict)
Beispiel #18
0
def smote(X: OperationNode, mask: OperationNode,
          **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    mask._check_matrix_op()
    params_dict = {'X': X, 'mask': mask}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'smote', named_input_nodes=params_dict)
Beispiel #19
0
def xdummy1(X: OperationNode) -> OperationNode:

    X._check_matrix_op()
    params_dict = {'X': X}
    return OperationNode(X.sds_context,
                         'xdummy1',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #20
0
def outlierByArima(X: OperationNode,
                   **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    params_dict = {'X': X}
    params_dict.update(kwargs)
    return Matrix(X.sds_context,
                  'outlierByArima',
                  named_input_nodes=params_dict)
Beispiel #21
0
def intersect(X: OperationNode, Y: OperationNode) -> OperationNode:

    X._check_matrix_op()
    Y._check_matrix_op()
    params_dict = {'X': X, 'Y': Y}
    return OperationNode(X.sds_context,
                         'intersect',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #22
0
def dist(X: OperationNode) -> Matrix:
    
    
    X._check_matrix_op()
    params_dict = {'X':X}
    return Matrix(X.sds_context, 'dist', named_input_nodes=params_dict)


    
Beispiel #23
0
def img_brightness(img_in: OperationNode, value: float, channel_max: int) -> Matrix:
    
    
    img_in._check_matrix_op()
    params_dict = {'img_in':img_in, 'value':value, 'channel_max':channel_max}
    return Matrix(img_in.sds_context, 'img_brightness', named_input_nodes=params_dict)


    
Beispiel #24
0
def knnbf(X: OperationNode, T: OperationNode, k_value: int) -> OperationNode:

    X._check_matrix_op()
    T._check_matrix_op()
    params_dict = {'X': X, 'T': T, 'k_value': k_value}
    return OperationNode(X.sds_context,
                         'knnbf',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #25
0
def winsorize(X: OperationNode, verbose: bool) -> Matrix:
    
    
    X._check_matrix_op()
    params_dict = {'X':X, 'verbose':verbose}
    return Matrix(X.sds_context, 'winsorize', named_input_nodes=params_dict)


    
Beispiel #26
0
def winsorize(X: OperationNode, verbose: bool) -> OperationNode:
    
    
    X._check_matrix_op()
    params_dict = {'X':X, 'verbose':verbose}
    return OperationNode(X.sds_context, 'winsorize', named_input_nodes=params_dict, output_type=OutputType.MATRIX)


    
Beispiel #27
0
def img_crop(img_in: OperationNode, w: int, h: int, x_offset: int, y_offset: int) -> OperationNode:
    
    
    img_in._check_matrix_op()
    params_dict = {'img_in':img_in, 'w':w, 'h':h, 'x_offset':x_offset, 'y_offset':y_offset}
    return OperationNode(img_in.sds_context, 'img_crop', named_input_nodes=params_dict, output_type=OutputType.MATRIX)


    
Beispiel #28
0
def imputeByMean(X: OperationNode, mask: OperationNode) -> OperationNode:

    X._check_matrix_op()
    mask._check_matrix_op()
    params_dict = {'X': X, 'mask': mask}
    return OperationNode(X.sds_context,
                         'imputeByMean',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #29
0
def img_mirror(img_in: OperationNode, horizontal_axis: bool) -> Matrix:
    
    
    img_in._check_matrix_op()
    params_dict = {'img_in':img_in, 'horizontal_axis':horizontal_axis}
    return Matrix(img_in.sds_context, 'img_mirror', named_input_nodes=params_dict)


    
Beispiel #30
0
def components(G: OperationNode,
               **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:

    G._check_matrix_op()
    params_dict = {'G': G}
    params_dict.update(kwargs)
    return OperationNode(G.sds_context,
                         'components',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)