def __call__(self, head: RDD): if self.distinct and not self.approximate: head = head.distinct() if self.explained: self._log.info("toDebugString():\n%s", head.toDebugString().decode()) if not self.approximate or not self.distinct: return head.count() return head.countApproxDistinct()
def __call__(self, rdd: RDD): if self.explained: self._log.info("toDebugString():\n%s", rdd.toDebugString().decode()) rdd.toDF().write.parquet(self.save_loc)