def hasForeignKey(self, referenceTable, keyMap, *keyMaps): """ Checks whether the columns with the given names define a foreign key to the specified reference table. Args: referenceTable (pyspark.sql.dataframe.DataFrame): Table to which the foreign key is pointing keyMap (Tuple[str, str]): Column mapping from this table to the reference one ("column1", "base_column1") keyMaps (Tuple[str, str]): Column mappings from this table to the reference one ("column1", "base_column1") Returns: core.Check object including this constraint """ jvmCheck = self.jvmCheck.hasForeignKey( referenceTable._jdf, jc.tuple2(self._jvm, keyMap), jc.iterable_to_scala_list( self._jvm, map(lambda t: jc.tuple2(self._jvm, t), keyMaps) ) ) return Check( self.dataFrame, self.name, self.cacheMethod, self.id, jvmCheck )
def isJoinableWith(self, referenceTable, keyMap, *keyMaps): """ Checks whether a join between this table and the given reference table returns any results. This can be seen as a weaker version of the foreign key check, as it requires only partial matches. Args: referenceTable (pyspark.sql.dataframe.DataFrame): Table to which the foreign key is pointing keyMap (Tuple[str, str]): Column mapping from this table to the reference one ("column1", "base_column1") keyMaps (Tuple[str, str]): Column mappings from this table to the reference one ("column1", "base_column1") Returns: core.Check object including this constraint """ jvmCheck = self.jvmCheck.isJoinableWith( referenceTable._jdf, jc.tuple2(self._jvm, keyMap), jc.iterable_to_scala_list( self._jvm, map(lambda t: jc.tuple2(self._jvm, t), keyMaps) ) ) return Check( self.dataFrame, self.name, self.cacheMethod, self.id, jvmCheck )
def hasFunctionalDependency(self, determinantSet, dependentSet): """ Checks whether the columns in the dependent set have a functional dependency on determinant set. Args: determinantSet (List[str]): column names which form a determinant set dependentSet (List[str]): sequence of column names which form a dependent set Returns: core.Check object including this constraint """ jvmCheck = self.jvmCheck.hasFunctionalDependency( jc.iterable_to_scala_list(self._jvm, determinantSet), jc.iterable_to_scala_list(self._jvm, dependentSet) ) return Check( self.dataFrame, self.name, self.cacheMethod, self.id, jvmCheck )
def hasUniqueKey(self, columnName, *columnNames): """ Checks whether the given columns are a unique key for this table. Args: columnName (str): name of the first column that is supposed to be part of the unique key columnNames (List[str]): names of the other columns that are supposed to be part of the unique key Returns: core.Check object including this constraint """ jvmCheck = self.jvmCheck.hasUniqueKey( columnName, jc.iterable_to_scala_list(self._jvm, columnNames) ) return Check( self.dataFrame, self.name, self.cacheMethod, self.id, jvmCheck )
def run(self, reporters=None): """ Runs check with all the previously specified constraints and report to every reporter passed as an argument Args: reporters (List[reporters.Reporter]): iterable of reporters to produce output on the check result. If not specified, reporters.ConsoleReporter is used Returns: None """ baos = None if not reporters: baos = ByteArrayOutputStream() reporters = [ConsoleReporter(baos)] jvm_reporters = jc.iterable_to_scala_list( self._jvm, [reporter.get_jvm_reporter(self._jvm) for reporter in reporters] ) self.jvmCheck.run(jvm_reporters) if baos: print baos.get_output()