def test_encoded_multibands(self): encoded = self.rdd.map(lambda s: AvroRegistry.tile_encoder(s)) actual_encoded = encoded.collect()[0] expected_encoded = AvroRegistry.tile_encoder(self.multiband_dict) for actual, expected in zip(actual_encoded['bands'], expected_encoded['bands']): self.assertEqual(actual, expected)
def __init__(self, pysc=None, **kwargs): if pysc: self.pysc = pysc elif kwargs: self.pysc = SparkContext(**kwargs) else: raise TypeError(("Either a SparkContext or its constructing" " parameters must be given," " but none were found")) self.sc = self.pysc._jsc.sc() self._jvm = self.pysc._gateway.jvm self.avroregistry = AvroRegistry()
def test_encoded_tuples(self): s = self.rdd._jrdd_deserializer.serializer encoded = self.rdd.map(lambda x: encoder(x)) actual_encoded = encoded.collect() expected_encoded = [{ '_1': AvroRegistry.tile_encoder(self.arrs[0]), '_2': self.extents[0] }, { '_1': AvroRegistry.tile_encoder(self.arrs[1]), '_2': self.extents[1] }, { '_1': AvroRegistry.tile_encoder(self.arrs[2]), '_2': self.extents[2] }] for actual, expected in zip(actual_encoded, expected_encoded): self.assertDictEqual(actual, expected)
def test_encoded_tiles(self): encoded = self.rdd.map(lambda s: AvroRegistry.tile_encoder(s)) actual_encoded = encoded.collect() expected_encoded = [ {'bands': [{'cols': 2, 'rows': 2, 'cells': bytearray([0, 0, 1, 1]), 'noDataValue': -128}]}, {'bands': [{'cols': 2, 'rows': 2, 'cells': bytearray([1, 2, 3, 4]), 'noDataValue': -128}]}, {'bands': [{'cols': 2, 'rows': 2, 'cells': bytearray([5, 6, 7, 8]), 'noDataValue': -128}]} ] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected)
class GeoPyContext(object): """A wrapper of ``SparkContext``. This wrapper provides extra functionality by providing methods that help with sending/recieving information to/from python. Args: pysc (pypspark.SparkContext, optional): An existing ``SparkContext``. **kwargs: ``GeoPyContext`` can create a ``SparkContext`` if given its constructing arguments. Note: If both ``pysc`` and ``kwargs`` are set the ``pysc`` will be used. Attributes: pysc (pyspark.SparkContext): The wrapped ``SparkContext``. sc (org.apache.spark.SparkContext): The scala ``SparkContext`` derived from the python one. Raises: TypeError: If neither a ``SparkContext`` or its constructing arguments are given. Examples: Creating ``GeoPyContext`` from an existing ``SparkContext``. >>> sc = SparkContext(appName="example", master="local[*]") >>> SparkContext >>> geopysc = GeoPyContext(sc) >>> GeoPyContext Creating ``GeoPyContext`` from the constructing arguments of ``SparkContext``. >>> geopysc = GeoPyContext(appName="example", master="local[*]") >>> GeoPyContext """ def __init__(self, pysc=None, **kwargs): if pysc: self.pysc = pysc elif kwargs: self.pysc = SparkContext(**kwargs) else: raise TypeError(("Either a SparkContext or its constructing" " parameters must be given," " but none were found")) self.sc = self.pysc._jsc.sc() self._jvm = self.pysc._gateway.jvm self.avroregistry = AvroRegistry() @staticmethod def map_key_input(key_type, is_boundable): """Gets the mapped GeoTrellis type from the `key_type`. Args: key_type (str): The type of the ``K`` in the tuple, ``(K, V)`` in the RDD. is_boundable (bool): Is ``K`` boundable. Returns: The corresponding GeoTrellis type. """ if is_boundable: if key_type == "spatial": return "SpatialKey" elif key_type == "spacetime": return "SpaceTimeKey" else: raise Exception("Could not find key type that matches", key_type) else: if key_type == "spatial": return "ProjectedExtent" elif key_type == "spacetime": return "TemporalProjectedExtent" else: raise Exception("Could not find key type that matches", key_type) def create_schema(self, key_type): """Creates an AvroSchema. Args: key_type (str): The type of the ``K`` in the tuple, ``(K, V)`` in the RDD. Returns: An AvroSchema for the types within the RDD. """ return self._jvm.geopyspark.geotrellis.SchemaProducer.getSchema( key_type) def create_tuple_serializer(self, schema, key_type, value_type): decoder = \ self.avroregistry.create_partial_tuple_decoder(key_type=key_type, value_type=value_type) encoder = \ self.avroregistry.create_partial_tuple_encoder(key_type=key_type, value_type=value_type) return AutoBatchedSerializer(AvroSerializer(schema, decoder, encoder)) def create_value_serializer(self, schema, value_type): decoder = self.avroregistry._get_decoder(value_type) encoder = self.avroregistry._get_encoder(value_type) return AvroSerializer(schema, decoder, encoder) def create_python_rdd(self, jrdd, serializer): """Creates a Python RDD from a RDD from Scala. Args: jrdd (org.apache.spark.api.java.JavaRDD): The RDD that came from Scala. serializer (:class:`~geopyspark.AvroSerializer` or pyspark.serializers.AutoBatchedSerializer(AvroSerializer)): An instance of ``AvroSerializer`` that is either alone, or wrapped by ``AutoBatchedSerializer``. Returns: ``pyspark.RDD`` """ if isinstance(serializer, AutoBatchedSerializer): return RDD(jrdd, self.pysc, serializer) else: return RDD(jrdd, self.pysc, AutoBatchedSerializer(serializer))