def hasUniqueness(self, columns, assertion): """ Creates a constraint that asserts on uniqueness in a single or combined set of key columns. @param columns Key columns @param assertion Function that receives a double input parameter and returns a boolean. Refers to the fraction of unique values @param hint A hint to provide additional context why a constraint could have failed """ if (not isinstance(columns, list)): # Single column is provided columns = [columns] function = jc.scala_function1(self.spark.sparkContext._gateway, assertion) jvmConstraint = self.jvmCheck.hasUniqueness( jc.iterable_to_scala_seq(self._jvm, columns), function) return Check(self.spark, self.level, self.description, jvmConstraint)
def jvmAnalyzer(self): return self.jvmdeequAnalyzers.UniqueValueRatio( jc.iterable_to_scala_seq(self._jvm, self.columns), getattr(self.jvmdeequAnalyzers.UniqueValueRatio, "apply$default$2")())
def jvmAnalyzer(self): return self.jvmdeequAnalyzers.MutualInformation( jc.iterable_to_scala_seq(self._jvm, self.columns), getattr(self.jvmdeequAnalyzers.MutualInformation, "apply$default$2")())
def jvmAnalyzer(self): return self.jvmdeequAnalyzers.Distinctness( jc.iterable_to_scala_seq(self._jvm, self.columns), getattr(self.jvmdeequAnalyzers.DataType, "apply$default$2")())
def jvmAnalyzer(self): return self.jvmdeequAnalyzers.CountDistinct( jc.iterable_to_scala_seq(self._jvm, self.column))