Beispiel #1
0
def outlierBySd(X: OperationNode, max_iterations: int,
                **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    params_dict = {'X': X, 'max_iterations': max_iterations}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'outlierBySd', named_input_nodes=params_dict)
Beispiel #2
0
def vectorToCsv(mask: OperationNode) -> Matrix:

    mask._check_matrix_op()
    params_dict = {'mask': mask}
    return Matrix(mask.sds_context,
                  'vectorToCsv',
                  named_input_nodes=params_dict)
Beispiel #3
0
def discoverFD(X: OperationNode, Mask: OperationNode,
               threshold: float) -> Matrix:

    X._check_matrix_op()
    Mask._check_matrix_op()
    params_dict = {'X': X, 'Mask': Mask, 'threshold': threshold}
    return Matrix(X.sds_context, 'discoverFD', named_input_nodes=params_dict)
Beispiel #4
0
def kmeans(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
    """
    Performs KMeans on matrix input.

    :param x: Input dataset to perform K-Means on.
    :param k: The number of centroids to use for the algorithm.
    :param runs: The number of concurrent instances of K-Means to run (with different initial centroids).
    :param max_iter: The maximum number of iterations to run the K-Means algorithm for.
    :param eps: Tolerance for the algorithm to declare convergence using WCSS change ratio.
    :param is_verbose: Boolean flag if the algorithm should be run in a verbose manner.
    :param avg_sample_size_per_centroid: The average number of records per centroid in the data samples.
    :return: `OperationNode` List containing two outputs 1. the clusters, 2 the cluster ID associated with each row in x.
    """

    x._check_matrix_op()
    if x.shape[0] == 0:
        raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required."
                         .format(s=x.shape))

    if 'k' in kwargs.keys() and kwargs.get('k') < 1:
        raise ValueError(
            "Invalid number of clusters in K-Means, number must be integer above 0")

    params_dict = {'X': x}
    params_dict.update(kwargs)
    return OperationNode(x.sds_context, 'kmeans', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2)
def pca(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
    """
    Performs PCA on the matrix input

    :param x: Input dataset to perform Principal Componenet Analysis (PCA) on.
    :param K: The number of reduced dimensions.
    :param center: Boolean specifying if the input values should be centered.
    :param scale: Boolean specifying if the input values should be scaled.
     :return: `OperationNode` List containing two outputs 1. The dimensionality reduced X input, 2. A matrix to reduce dimensionality similarly on unseen data.
    """

    x._check_matrix_op()
    if x.shape[0] == 0:
        raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required."
                         .format(s=x.shape))

    if 'K' in kwargs.keys() and kwargs.get('K') < 1:
        raise ValueError(
            "Invalid number of dimensions in PCA, number must be integer above 0")

    if 'scale' in kwargs.keys():
        if kwargs.get('scale') == True:
            kwargs.set('scale', "TRUE")
        elif kwargs.get('scale' == False):
            kwargs.set('scale', "FALSE")

    if 'center' in kwargs.keys():
        if kwargs.get('center') == True:
            kwargs.set('center', "TRUE")
        elif kwargs.get('center' == False):
            kwargs.set('center', "FALSE")

    params_dict = {'X': x}
    params_dict.update(kwargs)
    return OperationNode(x.sds_context, 'pca', named_input_nodes=params_dict,  output_type=OutputType.LIST, number_of_outputs=2)
Beispiel #6
0
def bivar(X: OperationNode, S1: OperationNode, S2: OperationNode,
          T1: OperationNode, T2: OperationNode, verbose: bool) -> Matrix:
    """
    :param verbose: Print bivar stats
    :return: 'OperationNode' containing  
    """

    X._check_matrix_op()
    S1._check_matrix_op()
    S2._check_matrix_op()
    T1._check_matrix_op()
    T2._check_matrix_op()
    params_dict = {
        'X': X,
        'S1': S1,
        'S2': S2,
        'T1': T1,
        'T2': T2,
        'verbose': verbose
    }
    return OperationNode(X.sds_context,
                         'bivar',
                         named_input_nodes=params_dict,
                         output_type=OutputType.LIST,
                         number_of_outputs=4,
                         output_types=[
                             OutputType.MATRIX, OutputType.MATRIX,
                             OutputType.MATRIX, OutputType.MATRIX
                         ])
Beispiel #7
0
def bandit(X_train: OperationNode, Y_train: OperationNode,
           X_val: OperationNode, Y_val: OperationNode, mask: OperationNode,
           schema: OperationNode, lp: OperationNode, primitives: OperationNode,
           param: OperationNode, isWeighted: bool,
           **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:

    X_train._check_matrix_op()
    Y_train._check_matrix_op()
    X_val._check_matrix_op()
    Y_val._check_matrix_op()
    mask._check_matrix_op()
    params_dict = {
        'X_train': X_train,
        'Y_train': Y_train,
        'X_val': X_val,
        'Y_val': Y_val,
        'mask': mask,
        'schema': schema,
        'lp': lp,
        'primitives': primitives,
        'param': param,
        'isWeighted': isWeighted
    }
    params_dict.update(kwargs)
    return OperationNode(
        X_train.sds_context,
        'bandit',
        named_input_nodes=params_dict,
        output_type=OutputType.LIST,
        number_of_outputs=3,
        output_types=[OutputType.FRAME, OutputType.MATRIX, OutputType.MATRIX])
Beispiel #8
0
def hyperband(X_train: OperationNode, y_train: OperationNode,
              X_val: OperationNode, y_val: OperationNode, params: Iterable,
              paramRanges: OperationNode,
              **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X_train._check_matrix_op()
    y_train._check_matrix_op()
    X_val._check_matrix_op()
    y_val._check_matrix_op()
    paramRanges._check_matrix_op()
    params_dict = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'params': params,
        'paramRanges': paramRanges
    }
    params_dict.update(kwargs)
    return OperationNode(X_train.sds_context,
                         'hyperband',
                         named_input_nodes=params_dict,
                         output_type=OutputType.LIST,
                         number_of_outputs=2,
                         output_types=[OutputType.MATRIX, OutputType.FRAME])
Beispiel #9
0
def bandit(X_train: OperationNode, Y_train: OperationNode, metaList: Iterable,
           targetList: Iterable, lp: OperationNode, primitives: OperationNode,
           param: OperationNode, **kwargs: Dict[str,
                                                VALID_INPUT_TYPES]) -> Matrix:

    X_train._check_matrix_op()
    Y_train._check_matrix_op()
    params_dict = {
        'X_train': X_train,
        'Y_train': Y_train,
        'metaList': metaList,
        'targetList': targetList,
        'lp': lp,
        'primitives': primitives,
        'param': param
    }
    params_dict.update(kwargs)
    return OperationNode(X_train.sds_context,
                         'bandit',
                         named_input_nodes=params_dict,
                         output_type=OutputType.LIST,
                         number_of_outputs=4,
                         output_types=[
                             OutputType.FRAME, OutputType.MATRIX,
                             OutputType.MATRIX, OutputType.FRAME
                         ])
Beispiel #10
0
def alsDS(X: OperationNode,
          **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
    """
    :param V: Location to read the input matrix V to be factorized
    :param L: Location to write the factor matrix L
    :param R: Location to write the factor matrix R
    :param rank: Rank of the factorization
    :param lambda: Regularization parameter, no regularization if 0.0
    :param maxi: Maximum number of iterations
    :param check: Check for convergence after every iteration, i.e., updating L and R once
    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared
    :param if: in loss in any two consecutive iterations falls below this threshold;
    :param if: FALSE thr is ignored
    :return: 'OperationNode' containing x n matrix r 
    """

    X._check_matrix_op()
    params_dict = {'X': X}
    params_dict.update(kwargs)
    return OperationNode(X.sds_context,
                         'alsDS',
                         named_input_nodes=params_dict,
                         output_type=OutputType.LIST,
                         number_of_outputs=2,
                         output_types=[OutputType.MATRIX, OutputType.MATRIX])
Beispiel #11
0
def components(G: OperationNode, **kwargs: Dict[str,
                                                VALID_INPUT_TYPES]) -> Matrix:

    G._check_matrix_op()
    params_dict = {'G': G}
    params_dict.update(kwargs)
    return Matrix(G.sds_context, 'components', named_input_nodes=params_dict)
Beispiel #12
0
def vectorToCsv(mask: OperationNode) -> OperationNode:

    mask._check_matrix_op()
    params_dict = {'mask': mask}
    return OperationNode(mask.sds_context,
                         'vectorToCsv',
                         named_input_nodes=params_dict,
                         output_type=OutputType.STRING)
Beispiel #13
0
def imputeByMedian(X: OperationNode, mask: OperationNode) -> Matrix:

    X._check_matrix_op()
    mask._check_matrix_op()
    params_dict = {'X': X, 'mask': mask}
    return Matrix(X.sds_context,
                  'imputeByMedian',
                  named_input_nodes=params_dict)
Beispiel #14
0
def outlier(X: OperationNode, opposite: bool) -> OperationNode:

    X._check_matrix_op()
    params_dict = {'X': X, 'opposite': opposite}
    return OperationNode(X.sds_context,
                         'outlier',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #15
0
def msvm(X: OperationNode, Y: OperationNode,
         **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    Y._check_matrix_op()
    params_dict = {'X': X, 'Y': Y}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'msvm', named_input_nodes=params_dict)
Beispiel #16
0
def vectorToCsv(vector: OperationNode) -> OperationNode:

    vector._check_matrix_op()
    params_dict = {'vector': vector}
    return OperationNode(vector.sds_context,
                         'vectorToCsv',
                         named_input_nodes=params_dict,
                         output_type=OutputType.STRING)
Beispiel #17
0
def smote(X: OperationNode, mask: OperationNode,
          **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    mask._check_matrix_op()
    params_dict = {'X': X, 'mask': mask}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'smote', named_input_nodes=params_dict)
Beispiel #18
0
def getAccuracy(y: OperationNode, yhat: OperationNode,
                **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    y._check_matrix_op()
    yhat._check_matrix_op()
    params_dict = {'y': y, 'yhat': yhat}
    params_dict.update(kwargs)
    return Matrix(y.sds_context, 'getAccuracy', named_input_nodes=params_dict)
Beispiel #19
0
def img_mirror(img_in: OperationNode, horizontal_axis: bool) -> OperationNode:

    img_in._check_matrix_op()
    params_dict = {'img_in': img_in, 'horizontal_axis': horizontal_axis}
    return OperationNode(img_in.sds_context,
                         'img_mirror',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #20
0
def xdummy1(X: OperationNode) -> OperationNode:

    X._check_matrix_op()
    params_dict = {'X': X}
    return OperationNode(X.sds_context,
                         'xdummy1',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #21
0
def lmCG(X: OperationNode, y: OperationNode,
         **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    y._check_matrix_op()
    params_dict = {'X': X, 'y': y}
    params_dict.update(kwargs)
    return Matrix(X.sds_context, 'lmCG', named_input_nodes=params_dict)
Beispiel #22
0
def knnbf(X: OperationNode, T: OperationNode, k_value: int) -> OperationNode:

    X._check_matrix_op()
    T._check_matrix_op()
    params_dict = {'X': X, 'T': T, 'k_value': k_value}
    return OperationNode(X.sds_context,
                         'knnbf',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #23
0
def img_mirror(img_in: OperationNode, horizontal_axis: bool) -> Matrix:
    
    
    img_in._check_matrix_op()
    params_dict = {'img_in':img_in, 'horizontal_axis':horizontal_axis}
    return Matrix(img_in.sds_context, 'img_mirror', named_input_nodes=params_dict)


    
Beispiel #24
0
def imputeByMean(X: OperationNode, mask: OperationNode) -> OperationNode:

    X._check_matrix_op()
    mask._check_matrix_op()
    params_dict = {'X': X, 'mask': mask}
    return OperationNode(X.sds_context,
                         'imputeByMean',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #25
0
def outlierByArima(X: OperationNode,
                   **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix:

    X._check_matrix_op()
    params_dict = {'X': X}
    params_dict.update(kwargs)
    return Matrix(X.sds_context,
                  'outlierByArima',
                  named_input_nodes=params_dict)
Beispiel #26
0
def intersect(X: OperationNode, Y: OperationNode) -> OperationNode:

    X._check_matrix_op()
    Y._check_matrix_op()
    params_dict = {'X': X, 'Y': Y}
    return OperationNode(X.sds_context,
                         'intersect',
                         named_input_nodes=params_dict,
                         output_type=OutputType.MATRIX)
Beispiel #27
0
def winsorize(X: OperationNode, verbose: bool) -> Matrix:
    
    
    X._check_matrix_op()
    params_dict = {'X':X, 'verbose':verbose}
    return Matrix(X.sds_context, 'winsorize', named_input_nodes=params_dict)


    
Beispiel #28
0
def dist(X: OperationNode) -> Matrix:
    
    
    X._check_matrix_op()
    params_dict = {'X':X}
    return Matrix(X.sds_context, 'dist', named_input_nodes=params_dict)


    
Beispiel #29
0
def img_brightness(img_in: OperationNode, value: float, channel_max: int) -> Matrix:
    
    
    img_in._check_matrix_op()
    params_dict = {'img_in':img_in, 'value':value, 'channel_max':channel_max}
    return Matrix(img_in.sds_context, 'img_brightness', named_input_nodes=params_dict)


    
Beispiel #30
0
def img_crop(img_in: OperationNode, w: int, h: int, x_offset: int, y_offset: int) -> OperationNode:
    
    
    img_in._check_matrix_op()
    params_dict = {'img_in':img_in, 'w':w, 'h':h, 'x_offset':x_offset, 'y_offset':y_offset}
    return OperationNode(img_in.sds_context, 'img_crop', named_input_nodes=params_dict, output_type=OutputType.MATRIX)