def _analyzer_jvm(self): """Returns the value of the computed distinctness :return self """ return self._deequAnalyzers.CountDistinct( to_scala_seq(self._jvm, self.columns))
def _analyzer_jvm(self): """Returns the value of the computed aggregation :return self """ return self._deequAnalyzers.ApproxQuantiles( self.column, to_scala_seq(self._jvm, self.quantiles), self.relativeError )
def _analyzer_jvm(self): """ Returns the unique value ratio in columns. :return self """ return self._deequAnalyzers.UniqueValueRatio( to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where) )
def _analyzer_jvm(self): """ Returns the mutual information of columns. :return self """ return self._deequAnalyzers.MutualInformation( to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where) )
def _analyzer_jvm(self): """ Returns the distinctness of the column(s) :return self: access the value of the distincness analyzer. """ return self._deequAnalyzers.Distinctness( to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where) )
def restrictToColumns(self, restrict_to_columns: list): """ Can be used to specify a subset of columns to look out :param list restrict_to_columns: Specified columns :return: A subset of columns to look at """ self._ColumnProfilerRunBuilder.restrictToColumns(to_scala_seq(self._jvm, restrict_to_columns)) return self
def areAnyComplete(self, columns, hint=None): """Creates a constraint that asserts any completion in the combined set of columns. :param list[str] columns: Columns in Data Frame to run the assertion on. :param str hint: A hint that states why a constraint could have failed. :return: areAnyComplete self: A Check.scala object that asserts completion in the columns. """ hint = self._jvm.scala.Option.apply(hint) columns_seq = to_scala_seq(self._jvm, columns) self._Check = self._Check.areAnyComplete(columns_seq, hint) return self
def getSuccessMetricsAsJson(self, withTags: list = None): """ Get the AnalysisResult as JSON :param withTags: List of tags to filter previous Metrics Repository runs with """ self._check_RepositoryLoader() if not withTags: withTags = getattr(self.repository.load(), "getSuccessMetricsAsJson$default$1")() # empty sequence else: withTags = to_scala_seq(self._jvm, withTags) return json.loads(self.RepositoryLoader.getSuccessMetricsAsJson(withTags))
def getSuccessMetricsAsDataFrame(self, withTags: list = None, pandas: bool = False): """ Get the AnalysisResult as DataFrame :param withTags: List of tags to filter previous Metrics Repository runs with """ self._check_RepositoryLoader() if not withTags: withTags = getattr(self.repository.load(), "getSuccessMetricsAsDataFrame$default$2")() # empty sequence else: withTags = to_scala_seq(self._jvm, withTags) success = self.RepositoryLoader.getSuccessMetricsAsDataFrame(self._jspark_session, withTags) return DataFrame(success, self._spark_session).toPandas() if pandas else DataFrame(success, self._spark_session)
def forAnalyzers(self, analyzers: list): """ Choose all metrics that you want to load :param analyzers: List of analyers who's resulting metrics you want to load """ analyzers_jvm = [] for analyzer in analyzers: analyzer._set_jvm(self._jvm) analyzers_jvm.append(analyzer._analyzer_jvm) self.RepositoryLoader.forAnalyzers(to_scala_seq(self._jvm, analyzers_jvm)) return self
def haveAnyCompleteness(self, columns, assertion, hint=None): """Creates a constraint that asserts on any completion in the combined set of columns. :param list[str] columns: Columns in Data Frame to run the assertion on. :param lambda assertion: A function that accepts an int or float parameter. :param str hint: A hint that states why a constraint could have failed. :return: haveAnyCompleteness self: A Check.scala object that asserts completion in the columns. """ columns_seq = to_scala_seq(self._jvm, columns) assertion_func = ScalaFunction1(self._spark_session.sparkContext._gateway, assertion) hint = self._jvm.scala.Option.apply(hint) self._Check = self._Check.haveAnyCompleteness(columns_seq, assertion_func, hint) return self
def hasUniqueValueRatio(self, columns, assertion, hint=None): """ Creates a constraint on the unique value ratio in a single or combined set of key columns. :param list[str] columns: Column(s) in Data Frame to run the assertion on. :param lambda assertion: A function that accepts an int or float parameter. :param str hint: A hint that states why a constraint could have failed. :return: hasUniqueValueRatio self: A Check object that asserts the unique value ratio in the columns. """ assertion_func = ScalaFunction1(self._spark_session.sparkContext._gateway, assertion) hint = self._jvm.scala.Option.apply(hint) columns_seq = to_scala_seq(self._jvm, columns) self._Check = self._Check.hasUniqueValueRatio(columns_seq, assertion_func, hint) return self