def gridSearch(X: Matrix, y: Matrix, train: str, predict: str, params: List, paramValues: List, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param train: Name ft of the train function to call via ft(trainArgs) :param predict: Name fp of the loss function to call via fp((predictArgs,B)) :param numB: Maximum number of parameters in model B (pass the max because the size :param may: parameters like icpt or multi-class classification) :param columnvectors: hyper-parameters in 'params' :param gridSearch: hyper-parameter by name, if :param not: an empty list, the lm parameters are used :param gridSearch: trained models at the end, if :param not: an empty list, list(X, y) is used instead :param cv: flag enabling k-fold cross validation, otherwise training loss :param cvk: if cv=TRUE, specifies the the number of folds, otherwise ignored :param verbose: flag for verbose debug output :return: 'OperationNode' containing returned as a column-major linearized column vector """ params_dict = {'X': X, 'y': y, 'train': train, 'predict': predict, 'params': params, 'paramValues': paramValues} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Frame(X.sds_context, '') output_nodes = [vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'gridSearch', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def ppca(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param k: indicates dimension of the new vector space constructed from eigen vectors :param maxi: maximum number of iterations until convergence :param tolobj: objective function tolerance value to stop ppca algorithm :param tolrecerr: reconstruction error tolerance value to stop the algorithm :param verbose: verbose debug output :return: 'OperationNode' containing """ params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'ppca', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def abstain(X: Matrix, Y: Matrix, threshold: float, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param threshold: --- :param verbose: flag specifying if logging information should be printed :return: 'OperationNode' containing """ params_dict = {'X': X, 'Y': Y, 'threshold': threshold} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'abstain', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def gnmf(X: Matrix, rnk: int, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param rnk: Number of components into which matrix X is to be factored :param eps: Tolerance :param maxi: Maximum number of conjugate gradient iterations :return: 'OperationNode' containing """ params_dict = {'X': X, 'rnk': rnk} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'gnmf', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def fixInvalidLengths(F1: Frame, mask: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): params_dict = {'F1': F1, 'mask': mask} params_dict.update(kwargs) vX_0 = Frame(F1.sds_context, '') vX_1 = Matrix(F1.sds_context, '') vX_2 = Matrix(F1.sds_context, '') vX_3 = Matrix(F1.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, ] op = MultiReturn(F1.sds_context, 'fixInvalidLengths', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] return op
def gmmPredict(X: Matrix, weight: Matrix, mu: Matrix, precisions_cholesky: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param X: Matrix X (instances to be clustered) :param weight: Weight of learned model :param mu: fitted clusters mean :param precisions_cholesky: fitted precision matrix for each mixture :param model: fitted model :return: 'OperationNode' containing predicted cluster labels & probabilities of belongingness & for new instances given the variance and mean of fitted data """ params_dict = { 'X': X, 'weight': weight, 'mu': mu, 'precisions_cholesky': precisions_cholesky } params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'gmmPredict', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def hospitalResidencyMatch(R: Matrix, H: Matrix, capacity: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param R: Residents matrix R. :param It: an ORDERED matrix. :param H: Hospitals matrix H. :param It: an UNORDRED matrix. :param capacity: capacity of Hospitals matrix C. :param It: a [n*1] matrix with non zero values. :param with: and vice-versa (higher is better). :return: 'OperationNode' containing result matrix & result matrix & an ordered matrix, this means that resident 1 (row 1) likes hospital 2 the most, followed by hospital 1 and hospital 3. & unordered, this would mean that resident 1 (row 1) likes hospital 3 the most (since the value at [1,3] is the row max), & 1 (2.0 preference value) and hospital 2 (1.0 preference value). & an unordered matrix this means that hospital 1 (row 1) likes resident 1 the most (since the value at [1,1] is the row max). & matched with hospital 3 (since [1,3] is non-zero) at a preference level of 2.0. & matched with hospital 1 (since [2,1] is non-zero) at a preference level of 1.0. & matched with hospital 2 (since [3,2] is non-zero) at a preference level of 2.0. """ params_dict = {'R': R, 'H': H, 'capacity': capacity} params_dict.update(kwargs) vX_0 = Matrix(R.sds_context, '') vX_1 = Matrix(R.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(R.sds_context, 'hospitalResidencyMatch', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def cvlm(X: Matrix, y: Matrix, k: int, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param k: Number of subsets needed, It should always be more than 1 and less than nrow(X) :param icpt: Intercept presence, shifting and rescaling the columns of X :param reg: Regularization constant (lambda) for L2-regularization. set to nonzero for :return: 'OperationNode' containing """ params_dict = {'X': X, 'y': y, 'k': k} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'cvlm', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def multiLogRegPredict(X: Matrix, B: Matrix, Y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param verbose: flag specifying if logging information should be printed :return: 'OperationNode' containing value of accuracy """ params_dict = {'X': X, 'B': B, 'Y': Y} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Scalar(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, ] op = MultiReturn(X.sds_context, 'multiLogRegPredict', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] return op
def splitBalanced(X: Matrix, Y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param f: Train set fraction [0,1] :param verbose: print available :return: 'OperationNode' containing """ params_dict = {'X': X, 'Y': Y} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') vX_3 = Matrix(X.sds_context, '') output_nodes = [vX_0, vX_1, vX_2, vX_3, ] op = MultiReturn(X.sds_context, 'splitBalanced', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] return op
def pca(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param X: Input feature matrix :param K: Number of reduced dimensions (i.e., columns) :param Center: Indicates whether or not to center the feature matrix :param Scale: Indicates whether or not to scale the feature matrix :return: 'OperationNode' containing output dominant eigen vectors (can be used for projections) & the column means of the input, subtracted to construct the pca & the scaling of the values, to make each dimension same size. """ params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') vX_3 = Matrix(X.sds_context, '') output_nodes = [vX_0, vX_1, vX_2, vX_3, ] op = MultiReturn(X.sds_context, 'pca', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] return op
def winsorize(X: Matrix, verbose: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param verbose: To print output on screen :return: 'OperationNode' containing """ params_dict = {'X': X, 'verbose': verbose} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, ] op = MultiReturn(X.sds_context, 'winsorize', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] return op
def correctTypos(strings: Frame, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param frequency_threshold: Strings that occur above this frequency level will not be corrected :param distance_threshold: Max distance at which strings are considered similar :param is_verbose: Print debug information :return: 'OperationNode' containing """ params_dict = {'strings': strings} params_dict.update(kwargs) vX_0 = Frame(strings.sds_context, '') vX_1 = Scalar(strings.sds_context, '') vX_2 = Scalar(strings.sds_context, '') vX_3 = Matrix(strings.sds_context, '') vX_4 = Frame(strings.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, vX_4, ] op = MultiReturn(strings.sds_context, 'correctTypos', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] vX_4._unnamed_input_nodes = [op] return op
def split(X: Matrix, Y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): params_dict = {'X': X, 'Y': Y} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') vX_3 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, ] op = MultiReturn(X.sds_context, 'split', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] return op
def hyperband(X_train: Matrix, y_train: Matrix, X_val: Matrix, y_val: Matrix, params: List, paramRanges: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param One: hyper parameter, first column specifies min, second column max value. :param verbose: If TRUE print messages are activated :return: 'OperationNode' containing """ params_dict = {'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'params': params, 'paramRanges': paramRanges} params_dict.update(kwargs) vX_0 = Matrix(X_train.sds_context, '') vX_1 = Frame(X_train.sds_context, '') output_nodes = [vX_0, vX_1, ] op = MultiReturn(X_train.sds_context, 'hyperband', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def multiLogRegPredict(X: Matrix, B: Matrix, Y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param X: Data Matrix X :param B: Regression parameters betas :param Y: Response vector Y :param verbose: / :return: 'OperationNode' containing matrix m of predicted means/probabilities & predicted response vector & scalar value of accuracy """ params_dict = {'X': X, 'B': B, 'Y': Y} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Scalar(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, ] op = MultiReturn(X.sds_context, 'multiLogRegPredict', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] return op
def csplineCG(X: Matrix, Y: Matrix, inp_x: float, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param monotonically: there is no duplicates points in X :param inp_x: the given input x, for which the cspline will find predicted y. :param tol: Tolerance (epsilon); conjugate graduent procedure terminates early if :param L2: the beta-residual is less than tolerance * its initial norm :param maxi: Maximum number of conjugate gradient iterations, 0 = no maximum :return: 'OperationNode' containing """ params_dict = {'X': X, 'Y': Y, 'inp_x': inp_x} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'csplineCG', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def steplm(X: Matrix, y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param icpt: Intercept presence, shifting and rescaling the columns of X: :param reg: learning rate :param tol: Tolerance threashold to train until achieved :param maxi: maximum iterations 0 means until tolerange is reached :param verbose: If the algorithm should be verbose :return: 'OperationNode' containing """ params_dict = {'X': X, 'y': y} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'steplm', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def alsTopkPredict(userIDs: Matrix, I: Matrix, L: Matrix, R: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param userIDs: Column vector of user-ids (n x 1) :param I: Indicator matrix user-id x user-id to exclude from scoring :param L: The factor matrix L: user-id x feature-id :param R: The factor matrix R: feature-id x item-id :param K: The number of top-K items :return: 'OperationNode' containing users (rows) & a matrix containing the top-k predicted ratings for the specified users (rows) """ params_dict = {'userIDs': userIDs, 'I': I, 'L': L, 'R': R} params_dict.update(kwargs) vX_0 = Matrix(userIDs.sds_context, '') vX_1 = Matrix(userIDs.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(userIDs.sds_context, 'alsTopkPredict', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def naiveBayes(D: Matrix, C: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param Laplace: Any Double value. :param Verbose: Boolean value. :return: 'OperationNode' containing """ params_dict = {'D': D, 'C': C} params_dict.update(kwargs) vX_0 = Matrix(D.sds_context, '') vX_1 = Matrix(D.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(D.sds_context, 'naiveBayes', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def als(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param X: Location to read the input matrix X to be factorized :param rank: Rank of the factorization :param reg: Regularization: :param lambda: Regularization parameter, no regularization if 0.0 :param maxi: Maximum number of iterations :param check: Check for convergence after every iteration, i.e., updating U and V once :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared :param if: in loss in any two consecutive iterations falls below this threshold; :param if: FALSE thr is ignored :return: 'OperationNode' containing x n matrix v """ params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'als', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def split(X: Matrix, Y: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param f: Train set fraction [0,1] :param cont: contiuous splits, otherwise sampled :param seed: The seed to reandomly select rows in sampled mode :return: 'OperationNode' containing """ params_dict = {'X': X, 'Y': Y} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') vX_3 = Matrix(X.sds_context, '') output_nodes = [vX_0, vX_1, vX_2, vX_3, ] op = MultiReturn(X.sds_context, 'split', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] return op
def scale(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param center: Indicates whether or not to center the feature matrix :param scale: Indicates whether or not to scale the feature matrix :return: 'OperationNode' containing """ params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, ] op = MultiReturn(X.sds_context, 'scale', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] return op
def pca(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param K: Number of reduced dimensions (i.e., columns) :param Center: Indicates whether or not to center the feature matrix :param Scale: Indicates whether or not to scale the feature matrix :return: 'OperationNode' containing """ params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') vX_3 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, ] op = MultiReturn(X.sds_context, 'pca', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] return op
def alsTopkPredict(userIDs: Matrix, I: Matrix, L: Matrix, R: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param K: The number of top-K items :return: 'OperationNode' containing """ params_dict = {'userIDs': userIDs, 'I': I, 'L': L, 'R': R} params_dict.update(kwargs) vX_0 = Matrix(userIDs.sds_context, '') vX_1 = Matrix(userIDs.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(userIDs.sds_context, 'alsTopkPredict', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def tomeklink(X: Matrix, y: Matrix): """ :param X: Data Matrix (nxm) :param y: Label Matrix (nx1) :return: 'OperationNode' containing """ params_dict = {'X': X, 'y': y} vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, ] op = MultiReturn(X.sds_context, 'tomeklink', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] return op
def slicefinder(X: Matrix, e: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param k: Number of subsets required :param maxL: level L (conjunctions of L predicates), 0 unlimited :param minSup: support (min number of rows per slice) :param alpha: [0,1]: 0 only size, 1 only error :param tpEval: for task-parallel slice evaluation, :param tpBlksz: size for task-parallel execution (num slices) :param selFeat: for removing one-hot-encoded features that don't satisfy :param the: constraint and/or have zero error :param verbose: for verbose debug output :return: 'OperationNode' containing """ params_dict = {'X': X, 'e': e} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, ] op = MultiReturn(X.sds_context, 'slicefinder', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] return op
def gaussianClassifier(D: Matrix, C: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param varSmoothing: Smoothing factor for variances :param verbose: Print accuracy of the training set :return: 'OperationNode' containing """ params_dict = {'D': D, 'C': C} params_dict.update(kwargs) vX_0 = Matrix(D.sds_context, '') vX_1 = Matrix(D.sds_context, '') vX_2 = List(D.sds_context, '') vX_3 = Matrix(D.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, ] op = MultiReturn(D.sds_context, 'gaussianClassifier', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] return op
def kmeans(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param k: Number of centroids :param runs: Number of runs (with different initial centroids) :param max_iter: Maximum number of iterations per run :param eps: Tolerance (epsilon) for WCSS change ratio :param is_verbose: do not print per-iteration stats :param avg_sample_size_per_centroid: Average number of records per centroid in data samples :param seed: The seed used for initial sampling. If set to -1 :param random: selected. :return: 'OperationNode' containing """ params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X.sds_context, 'kmeans', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op
def hyperband(X_train: Matrix, y_train: Matrix, X_val: Matrix, y_val: Matrix, params: Iterable, paramRanges: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): params_dict = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'params': params, 'paramRanges': paramRanges } params_dict.update(kwargs) vX_0 = Matrix(X_train.sds_context, '') vX_1 = Frame(X_train.sds_context, '') output_nodes = [ vX_0, vX_1, ] op = MultiReturn(X_train.sds_context, 'hyperband', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] return op