Example #1
0
def gridSearch(X: Matrix,
               y: Matrix,
               train: str,
               predict: str,
               params: List,
               paramValues: List,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param train: Name ft of the train function to call via ft(trainArgs)
    :param predict: Name fp of the loss function to call via fp((predictArgs,B))
    :param numB: Maximum number of parameters in model B (pass the max because the size
    :param may: parameters like icpt or multi-class classification)
    :param columnvectors: hyper-parameters in 'params'
    :param gridSearch: hyper-parameter by name, if
    :param not: an empty list, the lm parameters are used
    :param gridSearch: trained models at the end, if
    :param not: an empty list, list(X, y) is used instead
    :param cv: flag enabling k-fold cross validation, otherwise training loss
    :param cvk: if cv=TRUE, specifies the the number of folds, otherwise ignored
    :param verbose: flag for verbose debug output
    :return: 'OperationNode' containing returned as a column-major linearized column vector 
    """
    params_dict = {'X': X, 'y': y, 'train': train, 'predict': predict, 'params': params, 'paramValues': paramValues}
    params_dict.update(kwargs)
    
    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Frame(X.sds_context, '')
    output_nodes = [vX_0, vX_1, ]

    op = MultiReturn(X.sds_context, 'gridSearch', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #2
0
def ppca(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param k: indicates dimension of the new vector space constructed from eigen vectors
    :param maxi: maximum number of iterations until convergence
    :param tolobj: objective function tolerance value to stop ppca algorithm
    :param tolrecerr: reconstruction error tolerance value to stop the algorithm
    :param verbose: verbose debug output
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X.sds_context,
                     'ppca',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #3
0
def abstain(X: Matrix, Y: Matrix, threshold: float,
            **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param threshold: ---
    :param verbose: flag specifying if logging information should be printed
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'Y': Y, 'threshold': threshold}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X.sds_context,
                     'abstain',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #4
0
def gnmf(X: Matrix, rnk: int, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param rnk: Number of components into which matrix X is to be factored
    :param eps: Tolerance
    :param maxi: Maximum number of conjugate gradient iterations
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'rnk': rnk}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X.sds_context,
                     'gnmf',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #5
0
def fixInvalidLengths(F1: Frame, mask: Matrix,
                      **kwargs: Dict[str, VALID_INPUT_TYPES]):

    params_dict = {'F1': F1, 'mask': mask}
    params_dict.update(kwargs)

    vX_0 = Frame(F1.sds_context, '')
    vX_1 = Matrix(F1.sds_context, '')
    vX_2 = Matrix(F1.sds_context, '')
    vX_3 = Matrix(F1.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
    ]

    op = MultiReturn(F1.sds_context,
                     'fixInvalidLengths',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]

    return op
Example #6
0
def gmmPredict(X: Matrix, weight: Matrix, mu: Matrix,
               precisions_cholesky: Matrix, **kwargs: Dict[str,
                                                           VALID_INPUT_TYPES]):
    """
    :param X: Matrix X (instances to be clustered)
    :param weight: Weight of learned model
    :param mu: fitted clusters mean
    :param precisions_cholesky: fitted precision matrix for each mixture
    :param model: fitted model
    :return: 'OperationNode' containing predicted cluster labels & probabilities of belongingness & for new instances given the variance and mean of fitted data 
    """
    params_dict = {
        'X': X,
        'weight': weight,
        'mu': mu,
        'precisions_cholesky': precisions_cholesky
    }
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X.sds_context,
                     'gmmPredict',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
def hospitalResidencyMatch(R: Matrix, H: Matrix, capacity: Matrix,
                           **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param R: Residents matrix R.
    :param It: an ORDERED  matrix.
    :param H: Hospitals matrix H.
    :param It: an UNORDRED matrix.
    :param capacity: capacity of Hospitals matrix C.
    :param It: a [n*1] matrix with non zero values.
    :param with: and vice-versa (higher is better).
    :return: 'OperationNode' containing result matrix & result matrix & an ordered  matrix, this means that resident 1 (row 1) likes hospital 2 the most, followed by hospital 1 and hospital 3. & unordered, this would mean that resident 1 (row 1) likes hospital 3 the most (since the value at [1,3] is the row max), & 1 (2.0 preference value) and hospital 2 (1.0 preference value). & an unordered matrix this means that hospital 1 (row 1) likes resident 1 the most (since the value at [1,1] is the row max). & matched with hospital 3 (since [1,3] is non-zero) at a preference level of 2.0. & matched with hospital 1 (since [2,1] is non-zero) at a preference level of 1.0. & matched with hospital 2 (since [3,2] is non-zero) at a preference level of 2.0. 
    """
    params_dict = {'R': R, 'H': H, 'capacity': capacity}
    params_dict.update(kwargs)

    vX_0 = Matrix(R.sds_context, '')
    vX_1 = Matrix(R.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(R.sds_context,
                     'hospitalResidencyMatch',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #8
0
def cvlm(X: Matrix, y: Matrix, k: int, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param k: Number of subsets needed, It should always be more than 1 and less than nrow(X)
    :param icpt: Intercept presence, shifting and rescaling the columns of X
    :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero for
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'y': y, 'k': k}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X.sds_context,
                     'cvlm',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #9
0
def multiLogRegPredict(X: Matrix, B: Matrix, Y: Matrix,
                       **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param verbose: flag specifying if logging information should be printed
    :return: 'OperationNode' containing value of accuracy 
    """
    params_dict = {'X': X, 'B': B, 'Y': Y}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Scalar(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
    ]

    op = MultiReturn(X.sds_context,
                     'multiLogRegPredict',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]

    return op
Example #10
0
def splitBalanced(X: Matrix,
                  Y: Matrix,
                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param f: Train set fraction [0,1]
    :param verbose: print available
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'Y': Y}
    params_dict.update(kwargs)
    
    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    vX_3 = Matrix(X.sds_context, '')
    output_nodes = [vX_0, vX_1, vX_2, vX_3, ]

    op = MultiReturn(X.sds_context, 'splitBalanced', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]

    return op
Example #11
0
def pca(X: Matrix,
        **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param X: Input feature matrix
    :param K: Number of reduced dimensions (i.e., columns)
    :param Center: Indicates whether or not to center the feature matrix
    :param Scale: Indicates whether or not to scale the feature matrix
    :return: 'OperationNode' containing output dominant eigen vectors (can be used for projections) & the column means of the input, subtracted to construct the pca & the scaling of the values, to make each dimension same size. 
    """
    params_dict = {'X': X}
    params_dict.update(kwargs)
    
    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    vX_3 = Matrix(X.sds_context, '')
    output_nodes = [vX_0, vX_1, vX_2, vX_3, ]

    op = MultiReturn(X.sds_context, 'pca', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]

    return op
Example #12
0
def winsorize(X: Matrix, verbose: bool, **kwargs: Dict[str,
                                                       VALID_INPUT_TYPES]):
    """
    :param verbose: To print output on screen
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'verbose': verbose}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
    ]

    op = MultiReturn(X.sds_context,
                     'winsorize',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]

    return op
Example #13
0
def correctTypos(strings: Frame, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param frequency_threshold: Strings that occur above this frequency level will not be corrected
    :param distance_threshold: Max distance at which strings are considered similar
    :param is_verbose: Print debug information
    :return: 'OperationNode' containing  
    """
    params_dict = {'strings': strings}
    params_dict.update(kwargs)

    vX_0 = Frame(strings.sds_context, '')
    vX_1 = Scalar(strings.sds_context, '')
    vX_2 = Scalar(strings.sds_context, '')
    vX_3 = Matrix(strings.sds_context, '')
    vX_4 = Frame(strings.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
        vX_4,
    ]

    op = MultiReturn(strings.sds_context,
                     'correctTypos',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]
    vX_4._unnamed_input_nodes = [op]

    return op
Example #14
0
def split(X: Matrix, Y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):

    params_dict = {'X': X, 'Y': Y}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    vX_3 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
    ]

    op = MultiReturn(X.sds_context,
                     'split',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]

    return op
Example #15
0
def hyperband(X_train: Matrix,
              y_train: Matrix,
              X_val: Matrix,
              y_val: Matrix,
              params: List,
              paramRanges: Matrix,
              **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param One: hyper parameter, first column specifies min, second column max value.
    :param verbose: If TRUE print messages are activated
    :return: 'OperationNode' containing  
    """
    params_dict = {'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'params': params, 'paramRanges': paramRanges}
    params_dict.update(kwargs)
    
    vX_0 = Matrix(X_train.sds_context, '')
    vX_1 = Frame(X_train.sds_context, '')
    output_nodes = [vX_0, vX_1, ]

    op = MultiReturn(X_train.sds_context, 'hyperband', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #16
0
def multiLogRegPredict(X: Matrix, B: Matrix, Y: Matrix,
                       **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param X: Data Matrix X
    :param B: Regression parameters betas
    :param Y: Response vector Y
    :param verbose: /
    :return: 'OperationNode' containing matrix m of predicted means/probabilities & predicted response vector & scalar value of accuracy 
    """
    params_dict = {'X': X, 'B': B, 'Y': Y}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Scalar(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
    ]

    op = MultiReturn(X.sds_context,
                     'multiLogRegPredict',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]

    return op
Example #17
0
def csplineCG(X: Matrix,
              Y: Matrix,
              inp_x: float,
              **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param monotonically: there is no duplicates points in X
    :param inp_x: the given input x, for which the cspline will find predicted y.
    :param tol: Tolerance (epsilon); conjugate graduent procedure terminates early if
    :param L2: the beta-residual is less than tolerance * its initial norm
    :param maxi: Maximum number of conjugate gradient iterations, 0 = no maximum
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x}
    params_dict.update(kwargs)
    
    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [vX_0, vX_1, ]

    op = MultiReturn(X.sds_context, 'csplineCG', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #18
0
def steplm(X: Matrix, y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param icpt: Intercept presence, shifting and rescaling the columns of X:
    :param reg: learning rate
    :param tol: Tolerance threashold to train until achieved
    :param maxi: maximum iterations 0 means until tolerange is reached
    :param verbose: If the algorithm should be verbose
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'y': y}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X.sds_context,
                     'steplm',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #19
0
def alsTopkPredict(userIDs: Matrix, I: Matrix, L: Matrix, R: Matrix,
                   **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param userIDs: Column vector of user-ids (n x 1)
    :param I: Indicator matrix user-id x user-id to exclude from scoring
    :param L: The factor matrix L: user-id x feature-id
    :param R: The factor matrix R: feature-id x item-id
    :param K: The number of top-K items
    :return: 'OperationNode' containing users (rows) & a matrix containing the top-k predicted ratings for the specified users (rows) 
    """
    params_dict = {'userIDs': userIDs, 'I': I, 'L': L, 'R': R}
    params_dict.update(kwargs)

    vX_0 = Matrix(userIDs.sds_context, '')
    vX_1 = Matrix(userIDs.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(userIDs.sds_context,
                     'alsTopkPredict',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #20
0
def naiveBayes(D: Matrix, C: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param Laplace: Any Double value.
    :param Verbose: Boolean value.
    :return: 'OperationNode' containing  
    """
    params_dict = {'D': D, 'C': C}
    params_dict.update(kwargs)

    vX_0 = Matrix(D.sds_context, '')
    vX_1 = Matrix(D.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(D.sds_context,
                     'naiveBayes',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #21
0
def als(X: Matrix,
        **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param X: Location to read the input matrix X to be factorized
    :param rank: Rank of the factorization
    :param reg: Regularization: 
    :param lambda: Regularization parameter, no regularization if 0.0
    :param maxi: Maximum number of iterations
    :param check: Check for convergence after every iteration, i.e., updating U and V once
    :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared 
    :param if: in loss in any two consecutive iterations falls below this threshold; 
    :param if: FALSE thr is ignored
    :return: 'OperationNode' containing x n matrix v 
    """
    params_dict = {'X': X}
    params_dict.update(kwargs)
    
    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [vX_0, vX_1, ]

    op = MultiReturn(X.sds_context, 'als', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #22
0
def split(X: Matrix,
          Y: Matrix,
          **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param f: Train set fraction [0,1]
    :param cont: contiuous splits, otherwise sampled
    :param seed: The seed to reandomly select rows in sampled mode
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'Y': Y}
    params_dict.update(kwargs)
    
    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    vX_3 = Matrix(X.sds_context, '')
    output_nodes = [vX_0, vX_1, vX_2, vX_3, ]

    op = MultiReturn(X.sds_context, 'split', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]

    return op
Example #23
0
def scale(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param center: Indicates whether or not to center the feature matrix
    :param scale: Indicates whether or not to scale the feature matrix
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
    ]

    op = MultiReturn(X.sds_context,
                     'scale',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]

    return op
Example #24
0
def pca(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param K: Number of reduced dimensions (i.e., columns)
    :param Center: Indicates whether or not to center the feature matrix
    :param Scale: Indicates whether or not to scale the feature matrix
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    vX_3 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
    ]

    op = MultiReturn(X.sds_context,
                     'pca',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]

    return op
Example #25
0
def alsTopkPredict(userIDs: Matrix, I: Matrix, L: Matrix, R: Matrix,
                   **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param K: The number of top-K items
    :return: 'OperationNode' containing  
    """
    params_dict = {'userIDs': userIDs, 'I': I, 'L': L, 'R': R}
    params_dict.update(kwargs)

    vX_0 = Matrix(userIDs.sds_context, '')
    vX_1 = Matrix(userIDs.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(userIDs.sds_context,
                     'alsTopkPredict',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #26
0
def tomeklink(X: Matrix, y: Matrix):
    """
    :param X: Data Matrix (nxm)
    :param y: Label Matrix (nx1)
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'y': y}

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
    ]

    op = MultiReturn(X.sds_context,
                     'tomeklink',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]

    return op
Example #27
0
def slicefinder(X: Matrix, e: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param k: Number of subsets required
    :param maxL: level L (conjunctions of L predicates), 0 unlimited
    :param minSup: support (min number of rows per slice)
    :param alpha: [0,1]: 0 only size, 1 only error
    :param tpEval: for task-parallel slice evaluation,
    :param tpBlksz: size for task-parallel execution (num slices)
    :param selFeat: for removing one-hot-encoded features that don't satisfy
    :param the: constraint and/or have zero error
    :param verbose: for verbose debug output
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'e': e}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
    ]

    op = MultiReturn(X.sds_context,
                     'slicefinder',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]

    return op
Example #28
0
def gaussianClassifier(D: Matrix, C: Matrix,
                       **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param varSmoothing: Smoothing factor for variances
    :param verbose: Print accuracy of the training set
    :return: 'OperationNode' containing  
    """
    params_dict = {'D': D, 'C': C}
    params_dict.update(kwargs)

    vX_0 = Matrix(D.sds_context, '')
    vX_1 = Matrix(D.sds_context, '')
    vX_2 = List(D.sds_context, '')
    vX_3 = Matrix(D.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
    ]

    op = MultiReturn(D.sds_context,
                     'gaussianClassifier',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]

    return op
Example #29
0
def kmeans(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param k: Number of centroids
    :param runs: Number of runs (with different initial centroids)
    :param max_iter: Maximum number of iterations per run
    :param eps: Tolerance (epsilon) for WCSS change ratio
    :param is_verbose: do not print per-iteration stats
    :param avg_sample_size_per_centroid: Average number of records per centroid in data samples
    :param seed: The seed used for initial sampling. If set to -1
    :param random: selected.
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X.sds_context,
                     'kmeans',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op
Example #30
0
def hyperband(X_train: Matrix, y_train: Matrix, X_val: Matrix, y_val: Matrix,
              params: Iterable, paramRanges: Matrix,
              **kwargs: Dict[str, VALID_INPUT_TYPES]):

    params_dict = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'params': params,
        'paramRanges': paramRanges
    }
    params_dict.update(kwargs)

    vX_0 = Matrix(X_train.sds_context, '')
    vX_1 = Frame(X_train.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
    ]

    op = MultiReturn(X_train.sds_context,
                     'hyperband',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]

    return op