Ejemplo n.º 1
0
    def _analyzer_jvm(self):
        """Returns the value of the computed distinctness

        :return self
        """
        return self._deequAnalyzers.CountDistinct(
            to_scala_seq(self._jvm, self.columns))
Ejemplo n.º 2
0
    def _analyzer_jvm(self):
        """Returns the value of the computed aggregation

        :return self
        """
        return self._deequAnalyzers.ApproxQuantiles(
            self.column, to_scala_seq(self._jvm, self.quantiles), self.relativeError
        )
Ejemplo n.º 3
0
    def _analyzer_jvm(self):
        """
        Returns the unique value ratio in columns.

        :return self
        """
        return self._deequAnalyzers.UniqueValueRatio(
            to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where)
        )
Ejemplo n.º 4
0
    def _analyzer_jvm(self):
        """
        Returns the mutual information of columns.

        :return self
        """
        return self._deequAnalyzers.MutualInformation(
            to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where)
        )
Ejemplo n.º 5
0
    def _analyzer_jvm(self):
        """
        Returns the distinctness of the column(s)

        :return self: access the value of the distincness analyzer.
        """
        return self._deequAnalyzers.Distinctness(
            to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where)
        )
Ejemplo n.º 6
0
    def restrictToColumns(self, restrict_to_columns: list):
        """
        Can be used to specify a subset of columns to look out

        :param list restrict_to_columns: Specified columns
        :return: A subset of columns to look at
        """
        self._ColumnProfilerRunBuilder.restrictToColumns(to_scala_seq(self._jvm, restrict_to_columns))
        return self
Ejemplo n.º 7
0
    def areAnyComplete(self, columns, hint=None):
        """Creates a constraint that asserts any completion in the combined set of columns.

        :param list[str] columns: Columns in Data Frame to run the assertion on.
        :param str hint: A hint that states why a constraint could have failed.
        :return: areAnyComplete self: A Check.scala object that asserts completion in the columns.
        """
        hint = self._jvm.scala.Option.apply(hint)
        columns_seq = to_scala_seq(self._jvm, columns)
        self._Check = self._Check.areAnyComplete(columns_seq, hint)
        return self
Ejemplo n.º 8
0
 def getSuccessMetricsAsJson(self, withTags: list = None):
     """
     Get the AnalysisResult as JSON
     :param withTags: List of tags to filter previous Metrics Repository runs with
     """
     self._check_RepositoryLoader()
     if not withTags:
         withTags = getattr(self.repository.load(), "getSuccessMetricsAsJson$default$1")()  # empty sequence
     else:
         withTags = to_scala_seq(self._jvm, withTags)
     return json.loads(self.RepositoryLoader.getSuccessMetricsAsJson(withTags))
Ejemplo n.º 9
0
 def getSuccessMetricsAsDataFrame(self, withTags: list = None, pandas: bool = False):
     """
     Get the AnalysisResult as DataFrame
     :param withTags: List of tags to filter previous Metrics Repository runs with
     """
     self._check_RepositoryLoader()
     if not withTags:
         withTags = getattr(self.repository.load(), "getSuccessMetricsAsDataFrame$default$2")()  # empty sequence
     else:
         withTags = to_scala_seq(self._jvm, withTags)
     success = self.RepositoryLoader.getSuccessMetricsAsDataFrame(self._jspark_session, withTags)
     return DataFrame(success, self._spark_session).toPandas() if pandas else DataFrame(success, self._spark_session)
Ejemplo n.º 10
0
    def forAnalyzers(self, analyzers: list):
        """
        Choose all metrics that you want to load
        :param analyzers: List of analyers who's resulting metrics you want to load
        """
        analyzers_jvm = []
        for analyzer in analyzers:
            analyzer._set_jvm(self._jvm)
            analyzers_jvm.append(analyzer._analyzer_jvm)

        self.RepositoryLoader.forAnalyzers(to_scala_seq(self._jvm, analyzers_jvm))
        return self
Ejemplo n.º 11
0
    def haveAnyCompleteness(self, columns, assertion, hint=None):
        """Creates a constraint that asserts on any completion in the combined set of columns.

        :param list[str] columns: Columns in Data Frame to run the assertion on.
        :param lambda assertion: A function that accepts an int or float parameter.
        :param str hint: A hint that states why a constraint could have failed.
        :return: haveAnyCompleteness self: A Check.scala object that asserts completion in the columns.
        """
        columns_seq = to_scala_seq(self._jvm, columns)
        assertion_func = ScalaFunction1(self._spark_session.sparkContext._gateway, assertion)
        hint = self._jvm.scala.Option.apply(hint)
        self._Check = self._Check.haveAnyCompleteness(columns_seq, assertion_func, hint)
        return self
Ejemplo n.º 12
0
    def hasUniqueValueRatio(self, columns, assertion, hint=None):
        """
        Creates a constraint on the unique value ratio in a single or combined set of key columns.

        :param list[str] columns: Column(s) in Data Frame to run the assertion on.
        :param lambda assertion: A function that accepts an int or float parameter.
        :param str hint: A hint that states why a constraint could have failed.
        :return: hasUniqueValueRatio self: A Check object that asserts the unique value ratio in the columns.
        """
        assertion_func = ScalaFunction1(self._spark_session.sparkContext._gateway, assertion)
        hint = self._jvm.scala.Option.apply(hint)
        columns_seq = to_scala_seq(self._jvm, columns)
        self._Check = self._Check.hasUniqueValueRatio(columns_seq, assertion_func, hint)
        return self