def sum(self, axis: int = None) -> 'OperationNode': """Calculate sum of matrix. :param axis: can be 0 or 1 to do either row or column sums :return: `Matrix` representing operation """ if axis == 0: return Matrix(self.sds_context, 'colSums', [self]) elif axis == 1: return Matrix(self.sds_context, 'rowSums', [self]) elif axis is None: return Scalar(self.sds_context, 'sum', [self]) raise ValueError( f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" )
def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: """ Read an file from disk. Supportted types include: CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binay See: http://apache.github.io/systemds/site/dml-language-reference#readwrite-built-in-functions for more details :return: an Operation Node, containing the read data. """ mdt_filepath = path + ".mtd" if os.path.exists(mdt_filepath): with open(mdt_filepath) as jspec_file: mtd = json.load(jspec_file) kwargs["data_type"] = mtd["data_type"] data_type = kwargs.get("data_type", None) file_format = kwargs.get("format", None) if data_type == "matrix": kwargs["data_type"] = f'"{data_type}"' return Matrix(self, "read", [f'"{path}"'], named_input_nodes=kwargs) elif data_type == "frame": kwargs["data_type"] = f'"{data_type}"' if isinstance(file_format, str): kwargs["format"] = f'"{kwargs["format"]}"' return Frame(self, "read", [f'"{path}"'], named_input_nodes=kwargs) elif data_type == "scalar": kwargs["data_type"] = f'"{data_type}"' output_type = OutputType.from_str(kwargs.get("value_type", None)) kwargs["value_type"] = f'"{output_type.name}"' return Scalar(self, "read", [f'"{path}"'], named_input_nodes=kwargs, output_type=output_type) print( "WARNING: Unknown type read please add a mtd file, or specify in arguments" ) return OperationNode(self, "read", [f'"{path}"'], named_input_nodes=kwargs)
def gmm(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param n_components: Number of n_components in the Gaussian mixture model :param model: "VVV": unequal variance (full),each component has its own general covariance matrix :param init_param: initialize weights with "kmeans" or "random" :param iterations: Number of iterations :param reg_covar: regularization parameter for covariance matrix :param tol: tolerance value for convergence :return: 'OperationNode' containing of estimated parameters & information criterion for best iteration & kth class """ params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Scalar(X.sds_context, '') vX_3 = Scalar(X.sds_context, '') vX_4 = Matrix(X.sds_context, '') vX_5 = Matrix(X.sds_context, '') vX_6 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, ] op = MultiReturn(X.sds_context, 'gmm', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] vX_4._unnamed_input_nodes = [op] vX_5._unnamed_input_nodes = [op] vX_6._unnamed_input_nodes = [op] return op
def outlierByIQR(X: Matrix, k: float, max_iterations: int, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param k: a constant used to discern outliers k*IQR :param isIterative: iterative repair or single repair :param repairMethod: values: 0 = delete rows having outliers, :param max_iterations: values: 0 = arbitrary number of iteraition until all outliers are removed, :param verbose: flag specifying if logging information should be printed :return: 'OperationNode' containing meaning & matrix x with no outliers """ params_dict = {'X': X, 'k': k, 'max_iterations': max_iterations} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') vX_3 = Matrix(X.sds_context, '') vX_4 = Scalar(X.sds_context, '') vX_5 = Scalar(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, ] op = MultiReturn(X.sds_context, 'outlierByIQR', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] vX_4._unnamed_input_nodes = [op] vX_5._unnamed_input_nodes = [op] return op
def gmm(X: Matrix, verbose: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]): params_dict = {'X': X, 'verbose': verbose} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Scalar(X.sds_context, '') vX_3 = Scalar(X.sds_context, '') vX_4 = Matrix(X.sds_context, '') vX_5 = Matrix(X.sds_context, '') vX_6 = Matrix(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, ] op = MultiReturn(X.sds_context, 'gmm', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] vX_4._unnamed_input_nodes = [op] vX_5._unnamed_input_nodes = [op] vX_6._unnamed_input_nodes = [op] return op
def outlierBySd(X: Matrix, max_iterations: int, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param k: threshold values 1, 2, 3 for 68%, 95%, 99.7% respectively (3-sigma rule) :param repairMethod: values: 0 = delete rows having outliers, 1 = replace outliers as zeros :param max_iterations: values: 0 = arbitrary number of iteration until all outliers are removed, :return: 'OperationNode' containing """ params_dict = {'X': X, 'max_iterations': max_iterations} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') vX_1 = Matrix(X.sds_context, '') vX_2 = Matrix(X.sds_context, '') vX_3 = Scalar(X.sds_context, '') vX_4 = Scalar(X.sds_context, '') output_nodes = [ vX_0, vX_1, vX_2, vX_3, vX_4, ] op = MultiReturn(X.sds_context, 'outlierBySd', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] vX_3._unnamed_input_nodes = [op] vX_4._unnamed_input_nodes = [op] return op
def nCol(self) -> 'Scalar': return Scalar(self.sds_context, 'ncol', [self])
def nRow(self) -> 'Scalar': return Scalar(self.sds_context, 'nrow', [self])
def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'Scalar': """ Converts the input to a string representation. :return: `Scalar` containing the string. """ return Scalar(self.sds_context, 'toString', [self], kwargs, output_type=OutputType.STRING)
def as_scalar(self) -> Scalar: ent = self._list_source[self._key] res = Scalar(self.sds_context, "as.scalar", [ent]) self._list_source._outputs[self._key] = res return res