Esempio n. 1
0
    def sum(self, axis: int = None) -> 'OperationNode':
        """Calculate sum of matrix.

        :param axis: can be 0 or 1 to do either row or column sums
        :return: `Matrix` representing operation
        """
        if axis == 0:
            return Matrix(self.sds_context, 'colSums', [self])
        elif axis == 1:
            return Matrix(self.sds_context, 'rowSums', [self])
        elif axis is None:
            return Scalar(self.sds_context, 'sum', [self])
        raise ValueError(
            f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}"
        )
Esempio n. 2
0
    def read(self, path: os.PathLike,
             **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode:
        """ Read an file from disk. Supportted types include:
        CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binay
        See: http://apache.github.io/systemds/site/dml-language-reference#readwrite-built-in-functions for more details
        :return: an Operation Node, containing the read data.
        """
        mdt_filepath = path + ".mtd"
        if os.path.exists(mdt_filepath):
            with open(mdt_filepath) as jspec_file:
                mtd = json.load(jspec_file)
                kwargs["data_type"] = mtd["data_type"]

        data_type = kwargs.get("data_type", None)
        file_format = kwargs.get("format", None)
        if data_type == "matrix":
            kwargs["data_type"] = f'"{data_type}"'
            return Matrix(self,
                          "read", [f'"{path}"'],
                          named_input_nodes=kwargs)
        elif data_type == "frame":
            kwargs["data_type"] = f'"{data_type}"'
            if isinstance(file_format, str):
                kwargs["format"] = f'"{kwargs["format"]}"'
            return Frame(self, "read", [f'"{path}"'], named_input_nodes=kwargs)
        elif data_type == "scalar":
            kwargs["data_type"] = f'"{data_type}"'
            output_type = OutputType.from_str(kwargs.get("value_type", None))
            kwargs["value_type"] = f'"{output_type.name}"'
            return Scalar(self,
                          "read", [f'"{path}"'],
                          named_input_nodes=kwargs,
                          output_type=output_type)

        print(
            "WARNING: Unknown type read please add a mtd file, or specify in arguments"
        )
        return OperationNode(self,
                             "read", [f'"{path}"'],
                             named_input_nodes=kwargs)
Esempio n. 3
0
def gmm(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param n_components: Number of n_components in the Gaussian mixture model
    :param model: "VVV": unequal variance (full),each component has its own general covariance matrix
    :param init_param: initialize weights with "kmeans" or "random"
    :param iterations: Number of iterations
    :param reg_covar: regularization parameter for covariance matrix
    :param tol: tolerance value for convergence
    :return: 'OperationNode' containing of estimated parameters & information criterion for best iteration & kth class 
    """
    params_dict = {'X': X}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Scalar(X.sds_context, '')
    vX_3 = Scalar(X.sds_context, '')
    vX_4 = Matrix(X.sds_context, '')
    vX_5 = Matrix(X.sds_context, '')
    vX_6 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
        vX_4,
        vX_5,
        vX_6,
    ]

    op = MultiReturn(X.sds_context,
                     'gmm',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]
    vX_4._unnamed_input_nodes = [op]
    vX_5._unnamed_input_nodes = [op]
    vX_6._unnamed_input_nodes = [op]

    return op
Esempio n. 4
0
def outlierByIQR(X: Matrix, k: float, max_iterations: int,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param k: a constant used to discern outliers k*IQR
    :param isIterative: iterative repair or single repair
    :param repairMethod: values: 0 = delete rows having outliers,
    :param max_iterations: values: 0 = arbitrary number of iteraition until all outliers are removed,
    :param verbose: flag specifying if logging information should be printed
    :return: 'OperationNode' containing meaning & matrix x with no outliers 
    """
    params_dict = {'X': X, 'k': k, 'max_iterations': max_iterations}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    vX_3 = Matrix(X.sds_context, '')
    vX_4 = Scalar(X.sds_context, '')
    vX_5 = Scalar(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
        vX_4,
        vX_5,
    ]

    op = MultiReturn(X.sds_context,
                     'outlierByIQR',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]
    vX_4._unnamed_input_nodes = [op]
    vX_5._unnamed_input_nodes = [op]

    return op
Esempio n. 5
0
def gmm(X: Matrix, verbose: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]):

    params_dict = {'X': X, 'verbose': verbose}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Scalar(X.sds_context, '')
    vX_3 = Scalar(X.sds_context, '')
    vX_4 = Matrix(X.sds_context, '')
    vX_5 = Matrix(X.sds_context, '')
    vX_6 = Matrix(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
        vX_4,
        vX_5,
        vX_6,
    ]

    op = MultiReturn(X.sds_context,
                     'gmm',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]
    vX_4._unnamed_input_nodes = [op]
    vX_5._unnamed_input_nodes = [op]
    vX_6._unnamed_input_nodes = [op]

    return op
Esempio n. 6
0
def outlierBySd(X: Matrix, max_iterations: int,
                **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
    :param k: threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule)
    :param repairMethod: values: 0 = delete rows having outliers, 1 = replace outliers as  zeros
    :param max_iterations: values: 0 = arbitrary number of iteration until all outliers are removed,
    :return: 'OperationNode' containing  
    """
    params_dict = {'X': X, 'max_iterations': max_iterations}
    params_dict.update(kwargs)

    vX_0 = Matrix(X.sds_context, '')
    vX_1 = Matrix(X.sds_context, '')
    vX_2 = Matrix(X.sds_context, '')
    vX_3 = Scalar(X.sds_context, '')
    vX_4 = Scalar(X.sds_context, '')
    output_nodes = [
        vX_0,
        vX_1,
        vX_2,
        vX_3,
        vX_4,
    ]

    op = MultiReturn(X.sds_context,
                     'outlierBySd',
                     output_nodes,
                     named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]
    vX_4._unnamed_input_nodes = [op]

    return op
Esempio n. 7
0
 def nCol(self) -> 'Scalar':
     return Scalar(self.sds_context, 'ncol', [self])
Esempio n. 8
0
 def nRow(self) -> 'Scalar':
     return Scalar(self.sds_context, 'nrow', [self])
Esempio n. 9
0
 def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'Scalar':
     """ Converts the input to a string representation.
     :return: `Scalar` containing the string.
     """
     return Scalar(self.sds_context, 'toString', [self], kwargs, output_type=OutputType.STRING)
Esempio n. 10
0
 def as_scalar(self) -> Scalar:
     ent = self._list_source[self._key]
     res = Scalar(self.sds_context, "as.scalar", [ent])
     self._list_source._outputs[self._key] = res
     return res