def test_write_union_obj(): schema = '''[{"name": "Rec1", "type": "record", "fields": [ {"name": "attr1", "type": "int"} ] }, {"name": "Rec2", "type": "record", "fields": [ {"name": "attr2", "type": "string"} ]} ]''' dirname = tempfile.mkdtemp() filename = os.path.join(dirname, 'test.avro') avtypes = pyavroc.create_types(schema) assert avtypes.Rec1._fieldtypes == {'attr1': int} assert avtypes.Rec2._fieldtypes == {'attr2': str} recs = [avtypes.Rec1(attr1=123), avtypes.Rec2(attr2='hello')] with open(filename, 'w') as fp: writer = pyavroc.AvroFileWriter(fp, schema) for rec in recs: writer.write(rec) writer.close() orig_rec1 = avtypes.Rec1 orig_rec2 = avtypes.Rec2 # read using existing types with open(filename) as fp: reader = pyavroc.AvroFileReader(fp, types=avtypes) read_recs = list(reader) assert reader.types.Rec1 is orig_rec1 assert reader.types.Rec2 is orig_rec2 assert read_recs == recs # read and create new types with open(filename) as fp: reader = pyavroc.AvroFileReader(fp, types=True) read_recs = list(reader) assert reader.types.Rec1 is not orig_rec1 assert reader.types.Rec2 is not orig_rec2 assert read_recs != recs assert _testhelper.objs_to_dicts(read_recs) == _testhelper.objs_to_dicts(recs) shutil.rmtree(dirname)
def test_write_union_of_dicts(): schema = '''[{"name": "Rec1", "type": "record", "fields": [ {"name": "attr1", "type": "int"} ] }, {"name": "Rec2", "type": "record", "fields": [ {"name": "attr2", "type": "string"} ]} ]''' dirname = tempfile.mkdtemp() filename = os.path.join(dirname, 'test.avro') recs = [{'attr1': 123}, {'attr2': 'hello'}] with open(filename, 'w') as fp: writer = pyavroc.AvroFileWriter(fp, schema) for rec in recs: writer.write(rec) writer.close() with open(filename) as fp: reader = pyavroc.AvroFileReader(fp, types=False) read_recs = list(reader) assert read_recs == recs shutil.rmtree(dirname)
def test_bad_file_type(): irrelevant = '''{ "type": "boolean", "name": "x" }''' av_types = pyavroc.create_types(irrelevant) with pytest.raises(TypeError): # try to open a reader on a list reader = pyavroc.AvroFileReader(list(), types=av_types)
def test_pyavroc(types): print('pyavroc(types=%s): reading file...' % types) with open(filename, 'rb') as fp: av = pyavroc.AvroFileReader(fp, types=types) t0 = datetime.datetime.now() res = list(av) t1 = datetime.datetime.now() return (t1 - t0, len(res))
def test_pyavroc_pipe(): print('pyavroc(via pipe): reading file...') proc = subprocess.Popen(['/bin/cat', filename], stdout=subprocess.PIPE) av = pyavroc.AvroFileReader(proc.stdout) t0 = datetime.datetime.now() res = list(av) t1 = datetime.datetime.now() proc.wait() return (t1 - t0, len(res))
def test_union_with_bool(): schema = '''{ "type": "record", "name": "Rec", "fields": [ {"name": "attr1", "type": [ "null", "boolean" ]} ] }''' av_types = pyavroc.create_types(schema) with tempfile.NamedTemporaryFile() as tmpfile: writer = pyavroc.AvroFileWriter(tmpfile.file, schema) # Try writing null writer.write(av_types.Rec(attr1=None)) # Try writing a boolean value writer.write(av_types.Rec(attr1=True)) # Try writing an integer. Should be coerced to boolean without an error writer.write(av_types.Rec(attr1=33)) writer.write(av_types.Rec(attr1=0)) writer.close() tmpfile.flush() tmpfile.seek(0) reader = pyavroc.AvroFileReader(tmpfile.file, types=av_types) read_recs = list(reader) attr_values = [ r.attr1 for r in read_recs ] assert attr_values == [ None, True, True, False ]
def test_write_read_empty(): schema = '''[{"name": "Rec1", "type": "record", "fields": [ {"name": "attr1", "type": "int"} ] }, {"name": "Rec2", "type": "record", "fields": [ {"name": "attr2", "type": "string"} ]} ]''' dirname = tempfile.mkdtemp() filename = os.path.join(dirname, 'test.avro') avtypes = pyavroc.create_types(schema) with open(filename, 'w') as fp: writer = pyavroc.AvroFileWriter(fp, schema) writer.close() # read using existing types with open(filename) as fp: reader = pyavroc.AvroFileReader(fp, types=avtypes) read_recs = list(reader) assert len(read_recs) == 0 shutil.rmtree(dirname)
def _pyavroc_read(filename, types): fp = pyavroc.AvroFileReader(open(filename), types=types) return list(fp)