コード例 #1
0
 def hasForeignKey(self, referenceTable, keyMap, *keyMaps):
     """
     Checks whether the columns with the given names define a foreign key to
     the specified reference table.
     Args:
         referenceTable (pyspark.sql.dataframe.DataFrame): Table to which the
             foreign key is pointing
         keyMap (Tuple[str, str]): Column mapping from this table to the
             reference one ("column1", "base_column1")
         keyMaps (Tuple[str, str]):  Column mappings from this table to the
             reference one ("column1", "base_column1")
     Returns:
     core.Check object including this constraint
     """
     jvmCheck = self.jvmCheck.hasForeignKey(
         referenceTable._jdf,
         jc.tuple2(self._jvm, keyMap),
         jc.iterable_to_scala_list(
             self._jvm,
             map(lambda t: jc.tuple2(self._jvm, t), keyMaps)
         )
     )
     return Check(
         self.dataFrame,
         self.name,
         self.cacheMethod,
         self.id,
         jvmCheck
     )
コード例 #2
0
 def isJoinableWith(self, referenceTable, keyMap, *keyMaps):
     """
     Checks whether a join between this table and the given reference table
     returns any results. This can be seen as a weaker version of the foreign
     key check, as it requires only partial matches.
     Args:
         referenceTable (pyspark.sql.dataframe.DataFrame): Table to which the
             foreign key is pointing
         keyMap (Tuple[str, str]): Column mapping from this table to the
             reference one ("column1", "base_column1")
         keyMaps (Tuple[str, str]):  Column mappings from this table to the
             reference one ("column1", "base_column1")
     Returns:
     core.Check object including this constraint
     """
     jvmCheck = self.jvmCheck.isJoinableWith(
         referenceTable._jdf,
         jc.tuple2(self._jvm, keyMap),
         jc.iterable_to_scala_list(
             self._jvm,
             map(lambda t: jc.tuple2(self._jvm, t), keyMaps)
         )
     )
     return Check(
         self.dataFrame,
         self.name,
         self.cacheMethod,
         self.id,
         jvmCheck
     )
コード例 #3
0
 def hasFunctionalDependency(self, determinantSet, dependentSet):
     """
     Checks whether the columns in the dependent set have a functional
     dependency on determinant set.
     Args:
         determinantSet (List[str]): column names which form a determinant
             set
         dependentSet (List[str]): sequence of column names which form a
             dependent set
     Returns:
          core.Check object including this constraint
     """
     jvmCheck = self.jvmCheck.hasFunctionalDependency(
         jc.iterable_to_scala_list(self._jvm, determinantSet),
         jc.iterable_to_scala_list(self._jvm, dependentSet)
     )
     return Check(
         self.dataFrame,
         self.name,
         self.cacheMethod,
         self.id,
         jvmCheck
     )
コード例 #4
0
 def hasUniqueKey(self, columnName, *columnNames):
     """
     Checks whether the given columns are a unique key for this table.
     Args:
         columnName (str): name of the first column that is supposed to be
             part of the unique key
         columnNames (List[str]): names of the other columns that are
             supposed to be part of the unique key
     Returns:
         core.Check object including this constraint
     """
     jvmCheck = self.jvmCheck.hasUniqueKey(
         columnName,
         jc.iterable_to_scala_list(self._jvm, columnNames)
     )
     return Check(
         self.dataFrame,
         self.name,
         self.cacheMethod,
         self.id,
         jvmCheck
     )
コード例 #5
0
    def run(self, reporters=None):
        """
        Runs check with all the previously specified constraints and report to
        every reporter passed as an argument
        Args:
            reporters (List[reporters.Reporter]): iterable of reporters
                to produce output on the check result. If not specified,
                reporters.ConsoleReporter is used
        Returns: None
        """
        baos = None
        if not reporters:
            baos = ByteArrayOutputStream()
            reporters = [ConsoleReporter(baos)]

        jvm_reporters = jc.iterable_to_scala_list(
            self._jvm,
            [reporter.get_jvm_reporter(self._jvm) for reporter in reporters]
        )
        self.jvmCheck.run(jvm_reporters)

        if baos:
            print baos.get_output()