Example #1
0
def test_write_wrong_value():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ]}
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    with pytest.raises(TypeError) as excinfo:
        with open(filename, 'w') as fp:
            writer = pyavroc.AvroFileWriter(fp, schema)
            writer.write(avtypes.Rec1(attr1='x' * 120))
            writer.close()

    expected_error = "when writing to Rec1.attr1, invalid python object '" \
                     + ('x' * 99) + ", an integer is required"

    assert expected_error in str(excinfo.value)

    with pytest.raises(TypeError) as excinfo:
        with open(filename, 'w') as fp:
            writer = pyavroc.AvroFileWriter(fp, schema)
            writer.write(avtypes.Rec2(attr2=123))
            writer.close()

    expected_error = "when writing to Rec2.attr2, invalid python object 123," \
                     " expected string or Unicode object, int found"

    assert expected_error in str(excinfo.value)

    shutil.rmtree(dirname)
Example #2
0
def test_bad_file_type():
    irrelevant = '''{
        "type": "boolean",
        "name": "x"
        }'''
    av_types = pyavroc.create_types(irrelevant)
    with pytest.raises(TypeError):
        # try to open a reader on a list
        reader = pyavroc.AvroFileReader(list(), types=av_types)
Example #3
0
def _pyavroc_create_file(filename):
    avtypes = pyavroc.create_types(json_schema)

    with open(filename, 'w') as fp:
        writer = pyavroc.AvroFileWriter(fp, json_schema)

        for i in range(NRECORDS):
            writer.write(avtypes.User())

        writer.close()
Example #4
0
def test_coerce_int_long():
    schema = '''{
        "type": "record",
        "name": "Rec",
        "fields": [ {"name": "attr1", "type": "long"} ]
        }'''
    av_types = pyavroc.create_types(schema)
    rec = av_types.Rec(attr1=33) # an integer.  Should be coerced to long without an error
    with open('/dev/null', 'w') as fp:
        writer = pyavroc.AvroFileWriter(fp, schema)
        writer.write(rec)
        writer.close()
Example #5
0
def test_coerce_int_long():
    schema = '''{
        "type": "record",
        "name": "Rec",
        "fields": [ {"name": "attr1", "type": "long"} ]
        }'''
    av_types = pyavroc.create_types(schema)
    rec = av_types.Rec(attr1=33) # an integer.  Should be coerced to long without an error
    with open('/dev/null', 'w') as fp:
        writer = pyavroc.AvroFileWriter(fp, schema)
        writer.write(rec)
        writer.close()
def test_serialize_record():
    n_recs = 10
    avtypes = pyavroc.create_types(SCHEMA)
    serializer = pyavroc.AvroSerializer(SCHEMA)
    deserializer = Deserializer(SCHEMA)
    for i in range(n_recs):
        name, office = "name-%d" % i, "office-%d" % i
        avro_obj = avtypes.User(name=name, office=office)
        rec_bytes = serializer.serialize(avro_obj)
        deser_rec = deserializer.deserialize(rec_bytes)
        assert set(deser_rec) == set(['name', 'office', 'favorite_number'])
        assert deser_rec['name'] == name
        assert deser_rec['office'] == office
        assert deser_rec['favorite_number'] is None
Example #7
0
def test_serialize_record():
    n_recs = 10
    avtypes = pyavroc.create_types(SCHEMA)
    serializer = pyavroc.AvroSerializer(SCHEMA)
    deserializer = Deserializer(SCHEMA)
    for i in range(n_recs):
        name, office = "name-%d" % i, "office-%d" % i
        avro_obj = avtypes.User(name=name, office=office)
        rec_bytes = serializer.serialize(avro_obj)
        deser_rec = deserializer.deserialize(rec_bytes)
        assert set(deser_rec) == set(['name', 'office', 'favorite_number'])
        assert deser_rec['name'] == name
        assert deser_rec['office'] == office
        assert deser_rec['favorite_number'] is None
def _pyavroc_create_file(filename):
    avtypes = pyavroc.create_types(json_schema)

    fp = open(filename, 'w')

    writer = pyavroc.AvroFileWriter(fp, json_schema)

    for i in xrange(1):
        writer.write(avtypes.User(name='Alyssa', favorite_number=256))
        writer.write(avtypes.User(name='Ben', favorite_number=7, favorite_color='red'))

    writer.close()

    fp.close()
Example #9
0
def _pyavroc_create_file(filename):
    avtypes = pyavroc.create_types(json_schema)

    fp = open(filename, 'w')

    writer = pyavroc.AvroFileWriter(fp, json_schema)

    for i in range(1):
        writer.write(avtypes.User(name='Alyssa', favorite_number=256))
        writer.write(
            avtypes.User(name='Ben', favorite_number=7, favorite_color='red'))

    writer.close()

    fp.close()
Example #10
0
def test_write_union_obj():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ]}
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    assert avtypes.Rec1._fieldtypes == {'attr1': int}
    assert avtypes.Rec2._fieldtypes == {'attr2': str}

    recs = [avtypes.Rec1(attr1=123), avtypes.Rec2(attr2='hello')]

    with open(filename, 'w') as fp:
        writer = pyavroc.AvroFileWriter(fp, schema)
        for rec in recs:
            writer.write(rec)
        writer.close()

    orig_rec1 = avtypes.Rec1
    orig_rec2 = avtypes.Rec2

    # read using existing types
    with open(filename) as fp:
        reader = pyavroc.AvroFileReader(fp, types=avtypes)
        read_recs = list(reader)

    assert reader.types.Rec1 is orig_rec1
    assert reader.types.Rec2 is orig_rec2

    assert read_recs == recs

    # read and create new types
    with open(filename) as fp:
        reader = pyavroc.AvroFileReader(fp, types=True)
        read_recs = list(reader)

    assert reader.types.Rec1 is not orig_rec1
    assert reader.types.Rec2 is not orig_rec2

    assert read_recs != recs
    assert _testhelper.objs_to_dicts(read_recs) == _testhelper.objs_to_dicts(recs)

    shutil.rmtree(dirname)
Example #11
0
def test_write_union_obj():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ]}
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    assert avtypes.Rec1._fieldtypes == {'attr1': int}
    assert avtypes.Rec2._fieldtypes == {'attr2': str}

    recs = [avtypes.Rec1(attr1=123), avtypes.Rec2(attr2='hello')]

    with open(filename, 'w') as fp:
        writer = pyavroc.AvroFileWriter(fp, schema)
        for rec in recs:
            writer.write(rec)
        writer.close()

    orig_rec1 = avtypes.Rec1
    orig_rec2 = avtypes.Rec2

    # read using existing types
    with open(filename) as fp:
        reader = pyavroc.AvroFileReader(fp, types=avtypes)
        read_recs = list(reader)

    assert reader.types.Rec1 is orig_rec1
    assert reader.types.Rec2 is orig_rec2

    assert read_recs == recs

    # read and create new types
    with open(filename) as fp:
        reader = pyavroc.AvroFileReader(fp, types=True)
        read_recs = list(reader)

    assert reader.types.Rec1 is not orig_rec1
    assert reader.types.Rec2 is not orig_rec2

    assert read_recs != recs
    assert _testhelper.objs_to_dicts(read_recs) == _testhelper.objs_to_dicts(recs)

    shutil.rmtree(dirname)
Example #12
0
def test_write_closed():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ]}
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    fp = open(filename, 'w')
    writer = pyavroc.AvroFileWriter(fp, schema)
    writer.write(avtypes.Rec1(attr1=123))
    writer.close()
    fp.close()

    with pytest.raises(IOError):
        writer.write(avtypes.Rec1(attr1=456))

    shutil.rmtree(dirname)
Example #13
0
def test_write_wrong_type_primitive():
    schema = '''{
  "type": "record",
  "name": "Obj",
  "fields": [
    {"name": "string", "type": "string"},
    {"name": "number", "type": "int"}
  ]
}'''
    avtypes = pyavroc.create_types(schema)
    serializer = pyavroc.AvroSerializer(schema)

    # this shouldn't raise
    serializer.serialize(avtypes.Obj(string="pippo", number=1))
    # place an int in place of a str
    u = avtypes.Obj(string=1, number=1)
    with pytest.raises(TypeError):
        serializer.serialize(u)
    # string in place of int
    u = avtypes.Obj(string="a", number="a")
    with pytest.raises(TypeError):
        serializer.serialize(u)
Example #14
0
def test_write_wrong_type_primitive():
    schema = '''{
  "type": "record",
  "name": "Obj",
  "fields": [
    {"name": "string", "type": "string"},
    {"name": "number", "type": "int"}
  ]
}'''
    avtypes = pyavroc.create_types(schema)
    serializer = pyavroc.AvroSerializer(schema)

    # this shouldn't raise
    serializer.serialize(avtypes.Obj(string="pippo", number=1))
    # place an int in place of a str
    u = avtypes.Obj(string=1, number=1)
    with pytest.raises(TypeError):
        serializer.serialize(u)
    # string in place of int
    u = avtypes.Obj(string="a", number="a")
    with pytest.raises(TypeError):
        serializer.serialize(u)
Example #15
0
def test_write_closed():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ]}
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    fp = open(filename, 'w')
    writer = pyavroc.AvroFileWriter(fp, schema)
    writer.write(avtypes.Rec1(attr1=123))
    writer.close()
    fp.close()

    with pytest.raises(IOError):
        writer.write(avtypes.Rec1(attr1=456))

    shutil.rmtree(dirname)
Example #16
0
def test_union_with_bool():
    schema = '''{
        "type": "record",
        "name": "Rec",
        "fields": [ {"name": "attr1", "type": [ "null", "boolean" ]} ]
        }'''
    av_types = pyavroc.create_types(schema)
    with tempfile.NamedTemporaryFile() as tmpfile:
        writer = pyavroc.AvroFileWriter(tmpfile.file, schema)
        # Try writing null
        writer.write(av_types.Rec(attr1=None))
        # Try writing a boolean value
        writer.write(av_types.Rec(attr1=True))
        # Try writing an integer.  Should be coerced to boolean without an error
        writer.write(av_types.Rec(attr1=33))
        writer.write(av_types.Rec(attr1=0))
        writer.close()

        tmpfile.flush()
        tmpfile.seek(0)
        reader = pyavroc.AvroFileReader(tmpfile.file, types=av_types)
        read_recs = list(reader)
        attr_values = [ r.attr1 for r in read_recs ]
        assert attr_values == [ None, True, True, False ]
Example #17
0
def test_write_read_empty():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ]}
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    with open(filename, 'w') as fp:
        writer = pyavroc.AvroFileWriter(fp, schema)
        writer.close()

    # read using existing types
    with open(filename) as fp:
        reader = pyavroc.AvroFileReader(fp, types=avtypes)
        read_recs = list(reader)

    assert len(read_recs) == 0

    shutil.rmtree(dirname)
Example #18
0
def test_union_with_bool():
    schema = '''{
        "type": "record",
        "name": "Rec",
        "fields": [ {"name": "attr1", "type": [ "null", "boolean" ]} ]
        }'''
    av_types = pyavroc.create_types(schema)
    with tempfile.NamedTemporaryFile() as tmpfile:
        writer = pyavroc.AvroFileWriter(tmpfile.file, schema)
        # Try writing null
        writer.write(av_types.Rec(attr1=None))
        # Try writing a boolean value
        writer.write(av_types.Rec(attr1=True))
        # Try writing an integer.  Should be coerced to boolean without an error
        writer.write(av_types.Rec(attr1=33))
        writer.write(av_types.Rec(attr1=0))
        writer.close()

        tmpfile.flush()
        tmpfile.seek(0)
        reader = pyavroc.AvroFileReader(tmpfile.file, types=av_types)
        read_recs = list(reader)
        attr_values = [ r.attr1 for r in read_recs ]
        assert attr_values == [ None, True, True, False ]
Example #19
0
def test_write_read_empty():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ]}
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    with open(filename, 'w') as fp:
        writer = pyavroc.AvroFileWriter(fp, schema)
        writer.close()

    # read using existing types
    with open(filename) as fp:
        reader = pyavroc.AvroFileReader(fp, types=avtypes)
        read_recs = list(reader)

    assert len(read_recs) == 0

    shutil.rmtree(dirname)
Example #20
0
"fields": [
 {"name": "bar", "type": "int"}
] },
{"name": "Rec",
"type": "record",
"fields": [
 {"name": "foo", "type": "string"},
 {"name": "subrec", "type": "SubRec"}
]},
{"name": "Enu",
"type": "enum",
"symbols": ["ZERO", "ONE", "TWO"]
}
]'''

avtypes = pyavroc.create_types(json_schema)


def test_reduce():
    v = avtypes.Rec('hello', avtypes.SubRec(123))
    red = v.__reduce__()
    assert red == (avtypes.Rec, ('hello', avtypes.SubRec(123)))


def test_deepcopy():
    v = avtypes.Rec('hello', avtypes.SubRec(123))
    cv = copy.deepcopy(v)

    assert cv is not v
    assert cv == v
def test_create_recursive_types():
    avtypes = pyavroc.create_types(json_schema)

    assert 'friend' in avtypes.User._fieldtypes
Example #22
0
def test_write_wrong_value():
    schema = '''[{"name": "Rec1", "type": "record",
"fields": [ {"name": "attr1", "type": "int"} ] },
{"name": "Rec2", "type": "record",
"fields": [ {"name": "attr2", "type": "string"} ] },
{"name": "Rec3", "type": "record",
"fields": [ {"name": "attr3", "type": {"type": "map", "values": "int"}} ] },
{"name": "Rec4", "type": "record",
"fields": [ {"name": "attr4", "type": {"type": "array", "items": "int"}} ] }
]'''

    dirname = tempfile.mkdtemp()
    filename = os.path.join(dirname, 'test.avro')

    avtypes = pyavroc.create_types(schema)

    with pytest.raises(TypeError) as excinfo:
        with open(filename, 'w') as fp:
            writer = pyavroc.AvroFileWriter(fp, schema)
            writer.write(avtypes.Rec1(attr1='x' * 120))
            writer.close()

    expected_error = "when writing to Rec1.attr1, invalid python object '" \
                     + ('x' * 99) + ", an integer is required"
    expected_error2 = "when writing to Rec1.attr1, invalid python object '" \
                      + ('x' * 120) + "', an integer is required"

    assert expected_error in str(excinfo.value) \
        or expected_error2 in str(excinfo.value)

    with pytest.raises(TypeError) as excinfo:
        with open(filename, 'w') as fp:
            writer = pyavroc.AvroFileWriter(fp, schema)
            writer.write(avtypes.Rec2(attr2=123))
            writer.close()

    expected_error = "when writing to Rec2.attr2, invalid python object 123," \
                     " expected.*Unicode.*, int found"

    assert re.search(expected_error, str(excinfo.value))

    with pytest.raises(TypeError) as excinfo:
        with open(filename, 'w') as fp:
            writer = pyavroc.AvroFileWriter(fp, schema)
            writer.write(avtypes.Rec3(attr3=123))
            writer.close()

    expected_error = "when writing to Rec3.attr3, expected dict-like object, " \
                     "int found"

    assert expected_error in str(excinfo.value)

    with pytest.raises(TypeError) as excinfo:
        with open(filename, 'w') as fp:
            writer = pyavroc.AvroFileWriter(fp, schema)
            writer.write(avtypes.Rec4(attr4=123))
            writer.close()

    expected_error = "when writing to Rec4.attr4, expected list, int found"

    assert expected_error in str(excinfo.value)

    shutil.rmtree(dirname)
Example #23
0
def test_big():
    avtypes = pyavroc.create_types(SCHEMA)
    serializer = pyavroc.AvroSerializer(SCHEMA)
    long_str = 'X' * (10 * 1024 * 1024)
    avro_obj = avtypes.User(name=long_str, office=long_str)
    serializer.serialize(avro_obj)
Example #24
0
"fields": [
 {"name": "bar", "type": "int"}
] },
{"name": "Rec",
"type": "record",
"fields": [
 {"name": "foo", "type": "string"},
 {"name": "subrec", "type": "SubRec"}
]},
{"name": "Enu",
"type": "enum",
"symbols": ["ZERO", "ONE", "TWO"]
}
]'''

avtypes = pyavroc.create_types(json_schema)


def test_reduce():
    v = avtypes.Rec('hello', avtypes.SubRec(123))
    red = v.__reduce__()
    assert red == (avtypes.Rec, ('hello', avtypes.SubRec(123)))


def test_deepcopy():
    v = avtypes.Rec('hello', avtypes.SubRec(123))
    cv = copy.deepcopy(v)

    assert cv is not v
    assert cv == v
Example #25
0
def test_big():
    avtypes = pyavroc.create_types(SCHEMA)
    serializer = pyavroc.AvroSerializer(SCHEMA)
    long_str = 'X' * (10 * 1024 * 1024)
    avro_obj = avtypes.User(name=long_str, office=long_str)
    serializer.serialize(avro_obj)
Example #26
0
def test_create_recursive_types():
    avtypes = pyavroc.create_types(json_schema)

    assert 'friend' in avtypes.User._fieldtypes