Python to_scala_seq Exemples, pydeequ.scala_utils.to_scala_seq Python Exemples

Exemple #1

0

Afficher le fichier

    def _analyzer_jvm(self):
        """Returns the value of the computed distinctness

        :return self
        """
        return self._deequAnalyzers.CountDistinct(
            to_scala_seq(self._jvm, self.columns))

Exemple #2

0

Afficher le fichier

Fichier : analyzers.py Projet : sourcery-ai-bot/pydeequ3

    def _analyzer_jvm(self):
        """Returns the value of the computed aggregation

        :return self
        """
        return self._deequAnalyzers.ApproxQuantiles(
            self.column, to_scala_seq(self._jvm, self.quantiles), self.relativeError
        )

Exemple #3

0

Afficher le fichier

Fichier : analyzers.py Projet : sourcery-ai-bot/pydeequ3

    def _analyzer_jvm(self):
        """
        Returns the unique value ratio in columns.

        :return self
        """
        return self._deequAnalyzers.UniqueValueRatio(
            to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where)
        )

Exemple #4

0

Afficher le fichier

Fichier : analyzers.py Projet : sourcery-ai-bot/pydeequ3

    def _analyzer_jvm(self):
        """
        Returns the mutual information of columns.

        :return self
        """
        return self._deequAnalyzers.MutualInformation(
            to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where)
        )

Exemple #5

0

Afficher le fichier

Fichier : analyzers.py Projet : sourcery-ai-bot/pydeequ3

    def _analyzer_jvm(self):
        """
        Returns the distinctness of the column(s)

        :return self: access the value of the distincness analyzer.
        """
        return self._deequAnalyzers.Distinctness(
            to_scala_seq(self._jvm, self.columns), self._jvm.scala.Option.apply(self.where)
        )

Exemple #6

0

Afficher le fichier

Fichier : profiles.py Projet : awslabs/python-deequ

    def restrictToColumns(self, restrict_to_columns: list):
        """
        Can be used to specify a subset of columns to look out

        :param list restrict_to_columns: Specified columns
        :return: A subset of columns to look at
        """
        self._ColumnProfilerRunBuilder.restrictToColumns(to_scala_seq(self._jvm, restrict_to_columns))
        return self

Exemple #7

0

Afficher le fichier

    def areAnyComplete(self, columns, hint=None):
        """Creates a constraint that asserts any completion in the combined set of columns.

        :param list[str] columns: Columns in Data Frame to run the assertion on.
        :param str hint: A hint that states why a constraint could have failed.
        :return: areAnyComplete self: A Check.scala object that asserts completion in the columns.
        """
        hint = self._jvm.scala.Option.apply(hint)
        columns_seq = to_scala_seq(self._jvm, columns)
        self._Check = self._Check.areAnyComplete(columns_seq, hint)
        return self

Exemple #8

0

Afficher le fichier

 def getSuccessMetricsAsJson(self, withTags: list = None):
     """
     Get the AnalysisResult as JSON
     :param withTags: List of tags to filter previous Metrics Repository runs with
     """
     self._check_RepositoryLoader()
     if not withTags:
         withTags = getattr(self.repository.load(), "getSuccessMetricsAsJson$default$1")()  # empty sequence
     else:
         withTags = to_scala_seq(self._jvm, withTags)
     return json.loads(self.RepositoryLoader.getSuccessMetricsAsJson(withTags))

Exemple #9

0

Afficher le fichier

 def getSuccessMetricsAsDataFrame(self, withTags: list = None, pandas: bool = False):
     """
     Get the AnalysisResult as DataFrame
     :param withTags: List of tags to filter previous Metrics Repository runs with
     """
     self._check_RepositoryLoader()
     if not withTags:
         withTags = getattr(self.repository.load(), "getSuccessMetricsAsDataFrame$default$2")()  # empty sequence
     else:
         withTags = to_scala_seq(self._jvm, withTags)
     success = self.RepositoryLoader.getSuccessMetricsAsDataFrame(self._jspark_session, withTags)
     return DataFrame(success, self._spark_session).toPandas() if pandas else DataFrame(success, self._spark_session)

Exemple #10

0

Afficher le fichier

    def forAnalyzers(self, analyzers: list):
        """
        Choose all metrics that you want to load
        :param analyzers: List of analyers who's resulting metrics you want to load
        """
        analyzers_jvm = []
        for analyzer in analyzers:
            analyzer._set_jvm(self._jvm)
            analyzers_jvm.append(analyzer._analyzer_jvm)

        self.RepositoryLoader.forAnalyzers(to_scala_seq(self._jvm, analyzers_jvm))
        return self

Exemple #11

0

Afficher le fichier

    def haveAnyCompleteness(self, columns, assertion, hint=None):
        """Creates a constraint that asserts on any completion in the combined set of columns.

        :param list[str] columns: Columns in Data Frame to run the assertion on.
        :param lambda assertion: A function that accepts an int or float parameter.
        :param str hint: A hint that states why a constraint could have failed.
        :return: haveAnyCompleteness self: A Check.scala object that asserts completion in the columns.
        """
        columns_seq = to_scala_seq(self._jvm, columns)
        assertion_func = ScalaFunction1(self._spark_session.sparkContext._gateway, assertion)
        hint = self._jvm.scala.Option.apply(hint)
        self._Check = self._Check.haveAnyCompleteness(columns_seq, assertion_func, hint)
        return self

Exemple #12

0

Afficher le fichier

    def hasUniqueValueRatio(self, columns, assertion, hint=None):
        """
        Creates a constraint on the unique value ratio in a single or combined set of key columns.

        :param list[str] columns: Column(s) in Data Frame to run the assertion on.
        :param lambda assertion: A function that accepts an int or float parameter.
        :param str hint: A hint that states why a constraint could have failed.
        :return: hasUniqueValueRatio self: A Check object that asserts the unique value ratio in the columns.
        """
        assertion_func = ScalaFunction1(self._spark_session.sparkContext._gateway, assertion)
        hint = self._jvm.scala.Option.apply(hint)
        columns_seq = to_scala_seq(self._jvm, columns)
        self._Check = self._Check.hasUniqueValueRatio(columns_seq, assertion_func, hint)
        return self