Python AvroRegistry Examples

Programming Language: Python

Namespace/Package Name: geopyspark.avroregistry

Class/Type: AvroRegistry

Examples at hotexamples.com: 5

Python AvroRegistry - 5 examples found. These are the top rated real world Python examples of geopyspark.avroregistry.AvroRegistry extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

tile_encoder(3)

AvroRegistry(1)

_get_decoder(1)

_get_encoder(1)

create_partial_tuple_decoder(1)

create_partial_tuple_encoder(1)

Example #1

Show file

File: multiband_schema_test.py Project: rossbernet/geopyspark

    def test_encoded_multibands(self):
        encoded = self.rdd.map(lambda s: AvroRegistry.tile_encoder(s))

        actual_encoded = encoded.collect()[0]
        expected_encoded = AvroRegistry.tile_encoder(self.multiband_dict)

        for actual, expected in zip(actual_encoded['bands'], expected_encoded['bands']):
            self.assertEqual(actual, expected)

Example #2

Show file

    def __init__(self, pysc=None, **kwargs):
        if pysc:
            self.pysc = pysc
        elif kwargs:
            self.pysc = SparkContext(**kwargs)
        else:
            raise TypeError(("Either a SparkContext or its constructing"
                             " parameters must be given,"
                             " but none were found"))

        self.sc = self.pysc._jsc.sc()
        self._jvm = self.pysc._gateway.jvm

        self.avroregistry = AvroRegistry()

Example #3

Show file

File: tuple_schema_test.py Project: rossbernet/geopyspark

    def test_encoded_tuples(self):
        s = self.rdd._jrdd_deserializer.serializer

        encoded = self.rdd.map(lambda x: encoder(x))
        actual_encoded = encoded.collect()

        expected_encoded = [{
            '_1': AvroRegistry.tile_encoder(self.arrs[0]),
            '_2': self.extents[0]
        }, {
            '_1': AvroRegistry.tile_encoder(self.arrs[1]),
            '_2': self.extents[1]
        }, {
            '_1': AvroRegistry.tile_encoder(self.arrs[2]),
            '_2': self.extents[2]
        }]

        for actual, expected in zip(actual_encoded, expected_encoded):
            self.assertDictEqual(actual, expected)

Example #4

Show file

    def test_encoded_tiles(self):
        encoded = self.rdd.map(lambda s: AvroRegistry.tile_encoder(s))
        actual_encoded = encoded.collect()

        expected_encoded = [
            {'bands': [{'cols': 2, 'rows': 2, 'cells': bytearray([0, 0, 1, 1]), 'noDataValue': -128}]},
            {'bands': [{'cols': 2, 'rows': 2, 'cells': bytearray([1, 2, 3, 4]), 'noDataValue': -128}]},
            {'bands': [{'cols': 2, 'rows': 2, 'cells': bytearray([5, 6, 7, 8]), 'noDataValue': -128}]}
        ]

        for actual, expected in zip(actual_encoded, expected_encoded):
            self.assertEqual(actual, expected)

Example #5

Show file

class GeoPyContext(object):
    """A wrapper of ``SparkContext``.
    This wrapper provides extra functionality by providing methods that help with sending/recieving
    information to/from python.

    Args:
        pysc (pypspark.SparkContext, optional): An existing ``SparkContext``.
        **kwargs: ``GeoPyContext`` can create a ``SparkContext`` if given its constructing
            arguments.

    Note:
        If both ``pysc`` and ``kwargs`` are set the ``pysc`` will be used.

    Attributes:
        pysc (pyspark.SparkContext): The wrapped ``SparkContext``.
        sc (org.apache.spark.SparkContext): The scala ``SparkContext`` derived from the python one.

    Raises:
        TypeError: If neither a ``SparkContext`` or its constructing arguments are given.

    Examples:
        Creating ``GeoPyContext`` from an existing ``SparkContext``.

        >>> sc = SparkContext(appName="example", master="local[*]")
        >>> SparkContext
        >>> geopysc = GeoPyContext(sc)
        >>> GeoPyContext

        Creating ``GeoPyContext`` from the constructing arguments of ``SparkContext``.

        >>> geopysc = GeoPyContext(appName="example", master="local[*]")
        >>> GeoPyContext

    """
    def __init__(self, pysc=None, **kwargs):
        if pysc:
            self.pysc = pysc
        elif kwargs:
            self.pysc = SparkContext(**kwargs)
        else:
            raise TypeError(("Either a SparkContext or its constructing"
                             " parameters must be given,"
                             " but none were found"))

        self.sc = self.pysc._jsc.sc()
        self._jvm = self.pysc._gateway.jvm

        self.avroregistry = AvroRegistry()

    @staticmethod
    def map_key_input(key_type, is_boundable):
        """Gets the mapped GeoTrellis type from the `key_type`.

        Args:
            key_type (str): The type of the ``K`` in the tuple, ``(K, V)`` in the RDD.
            is_boundable (bool): Is ``K`` boundable.

        Returns:
            The corresponding GeoTrellis type.
        """

        if is_boundable:
            if key_type == "spatial":
                return "SpatialKey"
            elif key_type == "spacetime":
                return "SpaceTimeKey"
            else:
                raise Exception("Could not find key type that matches",
                                key_type)
        else:
            if key_type == "spatial":
                return "ProjectedExtent"
            elif key_type == "spacetime":
                return "TemporalProjectedExtent"
            else:
                raise Exception("Could not find key type that matches",
                                key_type)

    def create_schema(self, key_type):
        """Creates an AvroSchema.

        Args:
            key_type (str): The type of the ``K`` in the tuple, ``(K, V)`` in the RDD.

        Returns:
            An AvroSchema for the types within the RDD.
        """

        return self._jvm.geopyspark.geotrellis.SchemaProducer.getSchema(
            key_type)

    def create_tuple_serializer(self, schema, key_type, value_type):
        decoder = \
                self.avroregistry.create_partial_tuple_decoder(key_type=key_type,
                                                               value_type=value_type)

        encoder = \
                self.avroregistry.create_partial_tuple_encoder(key_type=key_type,
                                                               value_type=value_type)

        return AutoBatchedSerializer(AvroSerializer(schema, decoder, encoder))

    def create_value_serializer(self, schema, value_type):
        decoder = self.avroregistry._get_decoder(value_type)
        encoder = self.avroregistry._get_encoder(value_type)

        return AvroSerializer(schema, decoder, encoder)

    def create_python_rdd(self, jrdd, serializer):
        """Creates a Python RDD from a RDD from Scala.

        Args:
            jrdd (org.apache.spark.api.java.JavaRDD): The RDD that came from Scala.
            serializer (:class:`~geopyspark.AvroSerializer` or pyspark.serializers.AutoBatchedSerializer(AvroSerializer)):
                An instance of ``AvroSerializer`` that is either alone, or wrapped by ``AutoBatchedSerializer``.

        Returns:
            ``pyspark.RDD``
        """

        if isinstance(serializer, AutoBatchedSerializer):
            return RDD(jrdd, self.pysc, serializer)
        else:
            return RDD(jrdd, self.pysc, AutoBatchedSerializer(serializer))