def test_hash_serializer(self): hash(NoOpSerializer()) hash(UTF8Deserializer()) hash(CPickleSerializer()) hash(MarshalSerializer()) hash(AutoSerializer()) hash(BatchedSerializer(CPickleSerializer())) hash(AutoBatchedSerializer(MarshalSerializer())) hash(PairDeserializer(NoOpSerializer(), UTF8Deserializer())) hash(CartesianDeserializer(NoOpSerializer(), UTF8Deserializer())) hash(CompressedSerializer(CPickleSerializer())) hash(FlattenedValuesSerializer(CPickleSerializer()))
def cartesian(self, other): """ Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements C{(a, b)} where C{a} is in C{self} and C{b} is in C{other}. >>> rdd = sc.parallelize([1, 2]) >>> sorted(rdd.cartesian(rdd).collect()) [(1, 1), (1, 2), (2, 1), (2, 2)] """ # Due to batching, we can't use the Java cartesian method. deserializer = CartesianDeserializer(self._jrdd_deserializer, other._jrdd_deserializer) return RDD(self._jrdd.cartesian(other._jrdd), self.ctx, deserializer)