def _set_avro_writers(self):
     self._json_serializer = TcJsonSerializer(self._writer_schema)
     self._quickavro_encoder = quickavro.BinaryEncoder()
     self._quickavro_encoder.schema = self._writer_schema_json
     if self._output_file:
         self.file_writer = quickavro.FileWriter(self._output_file)
         self.file_writer.schema = self._writer_schema_json
Example #2
0
 def test_type_union_fixed(self):
     with quickavro.BinaryEncoder() as encoder:
         value = b"\x01\x02\x03\x04\x05\x06\x07\x08"
         encoder.schema = {
             "type":
             "record",
             "name":
             "TestRecord",
             "fields": [{
                 "name":
                 "testfield",
                 "type": [
                     "null", {
                         "name": "testfixed",
                         "type": "fixed",
                         "size": len(value)
                     }
                 ]
             }]
         }
         result = encoder.write({"testfield": None})
         assert result == b"\x00"
         result = encoder.write({})
         assert result == b"\x00"
         result = encoder.write({"testfield": value})
         assert result == b"\x02\x01\x02\x03\x04\x05\x06\x07\x08"
Example #3
0
    def test_record(self, tmpdir):
        Gender = quickavro.Enum("Gender", "F M")

        records = [
            {"name": "Larry", "age": 21},
            {"name": "Gary", "age": 34},
            {"name": "Barry", "age": 27},
            {"name": "Dark Larry", "age": 1134},
            {"name": "Larry", "age": None},
            {"name": "Larry"},
            {"name": "Larry", "gender": Gender.M},
            {"name": "Sherry", "gender": Gender.F},
        ]

        avro_file = os.path.join(str(tmpdir), "testfile1.avro")
        with quickavro.BinaryEncoder() as encoder:
            encoder.schema = {
                "type": "record",
                "name": "test",
                "fields": [
                    {"name": "name", "type": "string"},
                    {"name": "age",  "type": ["int", "null"], "default": 1},
                    {"name": "gender", "type": [Gender.T, "null"]},
                ]
            }
            avro_file = encoder.header
            for block in encoder.write_blocks(records):
                avro_file += block

            header, data = encoder.read_header(avro_file)

            for record in encoder.read_blocks(data):
                print(record)
Example #4
0
    def test_type_boolean(self):
        with quickavro.BinaryEncoder() as encoder:
            encoder.schema = {"type": "boolean"}
            result = encoder.write(True)
            assert result == b"\x01"

            result = encoder.write(False)
            assert result == b"\x00"
Example #5
0
 def test_type_fixed(self):
     with quickavro.BinaryEncoder() as encoder:
         value = b"\x01\x02\x03\x04\x05\x06\x07\x08"
         encoder.schema = {
             "name": "test",
             "type": "fixed",
             "size": len(value)
         }
         result = encoder.write(value)
         assert result == b"\x01\x02\x03\x04\x05\x06\x07\x08"
Example #6
0
 def test_invalid_union(self):
     with quickavro.BinaryEncoder() as encoder:
         with pytest.raises(quickavro.SchemaError):
             encoder.schema = {
                 "type": "record",
                 "name": "test",
                 "fields": [{
                     "name": "age",
                     "type": ["invalid", "null"]
                 }]
             }
             result = encoder.write({"age": 8011.125})
             assert result == b"\x08test"
Example #7
0
    def test_type_enum(self):
        # Test Avro enum encoding with quickavro custom Enum type
        Gender = quickavro.Enum("Gender", "F M")

        with quickavro.BinaryEncoder() as encoder:
            encoder.schema = Gender.T
            # result = encoder.write({"test": Gender.M}) Segfault
            result = encoder.write(Gender.F)
            assert result == b"\x00"
            result = encoder.write(Gender.M)
            assert result == b"\x02"

        # Test Avro enum encoding with string symbols matching a schema
        with quickavro.BinaryEncoder() as encoder:
            encoder.schema = {
                "type": "enum",
                "name": "Gender",
                "symbols": ["F", "M"]
            }
            result = encoder.write("F")
            assert result == b"\x00"
            result = encoder.write("M")
            assert result == b"\x02"
Example #8
0
    def prepare_input(self):
        self._encode = None
        if self._encoding == _Codec.NULL:
            basic_decode = lambda x: x
        elif self._encoding == _Codec.UTF8:
            basic_decode = lambda x: x.decode('utf-8')
        elif self._encoding == _Codec.JSON:
            basic_decode = lambda x: json.loads(x)
        elif self._encoding == _Codec.AVRO:
            if self._schema == None:

                def schema_not_set(x):
                    raise Exception("Avro schema not set")

                basic_decode = schema_not_set
            else:
                avro_decoder = quickavro.BinaryEncoder()  # name ok
                avro_decoder.schema = self._schema
                basic_decode = lambda x: avro_decoder.read(x)[0]
        elif self._encoding == _Codec.SOAP:
            basic_decode = soap_decode
        else:
            raise Exception("BERT Serialization not supported")

        if not self._recordsets:

            def decode(g):
                for x in g:
                    yield basic_decode(x)
        else:

            def decode(g):
                rr = [basic_decode(x) for x in g]
                if len(rr) == 0:
                    yield pd.DataFrame(rr)
                elif type(rr[0]) is list:
                    ## array records
                    yield np.matrix(rr)
                elif type(rr[0]) is not dict:
                    ## scalar records
                    yield pd.Series(rr)
                else:
                    ## proper records
                    yield pd.DataFrame(rr)

        self._decode = decode
Example #9
0
 def test_type_record(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {
             "type":
             "record",
             "name":
             "test",
             "fields": [{
                 "name": "name",
                 "type": "string"
             }, {
                 "name": "age",
                 "type": "int"
             }]
         }
         result = encoder.write({"name": "Larry", "age": 21})
         assert result == b"\nLarry*"
Example #10
0
    def test_type_link(self):
        with quickavro.BinaryEncoder() as encoder:
            encoder.schema = {
                "type":
                "record",
                "name":
                "chainlink",
                "fields": [{
                    "name": "linkid",
                    "type": "int"
                }, {
                    "name": "nextlink",
                    "type": ["null", "chainlink"]
                }]
            }

            chain = {"linkid": 1, "nextlink": {"linkid": 2}}
            result = encoder.write(chain)
            assert result == b"\x02\x02\x04\x00"
Example #11
0
 def test_type_union(self):
     TextAge = quickavro.Enum("TextAge", "AGE_ONE AGE_TWO")
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {
             "type":
             "record",
             "name":
             "test",
             "fields": [
                 {
                     "name":
                     "ages",
                     "type": [
                         "int", "null", {
                             "type": "array",
                             "items": "int"
                         }, {
                             "type": "enum",
                             "name": "TextAge",
                             "symbols": ["AGE_ONE", "AGE_TWO"]
                         }
                     ]
                 },
             ]
         }
         # encoder.schema = {"type": ["string", "null"]}
         result = encoder.write({"ages": 25})
         assert result == b"\x002"
         result = encoder.write({"ages": None})
         assert result == b"\x02"
         result = encoder.write({"ages": [16, 18, 21]})
         assert result == b"\x04\x06 $*\x00"
         result = encoder.write({"ages": "AGE_ONE"})
         assert result == b"\x06\x00"
         result = encoder.write({"ages": TextAge.AGE_TWO})
         assert result == b"\x06\x02"
Example #12
0
 def test_type_null(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "null"}
         result = encoder.write(None)
         assert result == b""
 def _set_avro_readers(self):
     self._json_deserializer = TcJsonDeserializer(self._reader_schema)
     self._quickavro_decoder = quickavro.BinaryEncoder()
     self._quickavro_decoder.schema = self._reader_schema_json
     self._quickavro_writer_decoder = quickavro.BinaryEncoder()
     self._quickavro_writer_decoder.schema = self._writer_schema_json
Example #14
0
 def test_type_double(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "double"}
         result = encoder.write(1928474837480108311.521837843)
         assert result == b"\xf1G\xae\xa5Q\xc3\xbaC"
Example #15
0
 def test_type_float(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "float"}
         result = encoder.write(8311.125)
         assert result == b"\x80\xdc\x01F"
Example #16
0
 def test_type_int64(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "long"}
         result = encoder.write(4294967295)
         assert result == b"\xfe\xff\xff\xff\x1f"
Example #17
0
 def test_type_int32(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "int"}
         result = encoder.write(10000)
         assert result == b"\xa0\x9c\x01"
Example #18
0
 def test_type_array(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "array", "items": "string"}
         result = encoder.write(["test1", "test2"])
         assert result == b"\x04\ntest1\ntest2\x00"
Example #19
0
 def test_type_bytes(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "bytes"}
         result = encoder.write(b"test")
         assert result == b"\x08test"
Example #20
0
 def test_type_map(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"type": "map", "values": "string"}
         test_map = {"mykey": "myval"}
         result = encoder.write(test_map)
         assert result == b"\x02\nmykey\nmyval\x00"
Example #21
0
 def test_type_string(self):
     with quickavro.BinaryEncoder() as encoder:
         encoder.schema = {"name": "name", "type": "string"}
         result = encoder.write("test")
         assert result == b"\x08test"