def test_append(self): print '' print 'TEST APPEND' print '===========' print '' correct = 0 for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): for codec in CODECS_TO_VALIDATE: if (codec == 'snappy'): try: import snappy except: print 'Snappy not present. Skipping.' correct += 1 continue print '' print 'SCHEMA NUMBER %d' % (i + 1) print '================' print '' print 'Schema: %s' % example_schema print 'Datum: %s' % datum print 'Codec: %s' % codec # write data in binary to file once writer = open(FILENAME, 'wb') datum_writer = io.DatumWriter() schema_object = schema.parse(example_schema) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec) dfw.append(datum) dfw.close() # open file, write, and close nine times for i in range(9): writer = open(FILENAME, 'ab+') dfw = datafile.DataFileWriter(writer, io.DatumWriter()) dfw.append(datum) dfw.close() # read data in binary from file reader = open(FILENAME, 'rb') datum_reader = io.DatumReader() dfr = datafile.DataFileReader(reader, datum_reader) appended_data = [] for datum in dfr: appended_data.append(datum) print 'Appended Data: %s' % appended_data print 'Appended Data Length: %d' % len(appended_data) is_correct = [datum] * 10 == appended_data if is_correct: correct += 1 print 'Correct Appended: %s' % is_correct print '' os.remove(FILENAME) self.assertEquals(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
def testAppend(self): correct = 0 codecs_to_validate = get_codecs_to_validate() for iexample, (writer_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): for codec in codecs_to_validate: file_path = self.NewTempFile() logging.debug( 'Performing append with codec %r in file %s for example #%d\n' 'Writing datum: %r using writer schema:\n%s', codec, file_path, iexample, datum, writer_schema) logging.debug('Creating data file %r', file_path) with open(file_path, 'wb') as writer: datum_writer = io.DatumWriter() schema_object = schema.parse(writer_schema) with datafile.DataFileWriter( writer=writer, datum_writer=datum_writer, writer_schema=schema_object, codec=codec, ) as dfw: dfw.append(datum) logging.debug('Appending data to %r', file_path) for i in range(9): with open(file_path, 'ab+') as writer: with datafile.DataFileWriter(writer, io.DatumWriter()) as dfw: dfw.append(datum) logging.debug('Reading appended data from %r', file_path) with open(file_path, 'rb') as reader: datum_reader = io.DatumReader() with datafile.DataFileReader(reader, datum_reader) as dfr: appended_data = list(dfr) logging.debug( 'Appended data has %d items: %r', len(appended_data), appended_data) if ([datum] * 10) == appended_data: correct += 1 else: logging.error( 'Appended data does not match:\n' 'Expect: %r\n' 'Actual: %r', [datum] * 10, appended_data) self.assertEqual( correct, len(codecs_to_validate) * len(SCHEMAS_TO_VALIDATE))
def obtain_df_writer(filename): '''This returns a df writer object to send data to .avro file.''' return datafile.DataFileWriter( open(filename, 'wb'), rec_writer, writers_schema = SCHEMA )
def test_context_manager(self): # Context manager was introduced as a first class # member only in Python 2.6 and above. import sys if sys.version_info < (2, 6): print 'Skipping context manager tests on this Python version.' return # Test the writer with a 'with' statement. writer = open(FILENAME, 'wb') datum_writer = io.DatumWriter() sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1] schema_object = schema.parse(sample_schema) with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw: dfw.append(sample_datum) self.assertTrue(writer.closed) # Test the reader with a 'with' statement. datums = [] reader = open(FILENAME, 'rb') datum_reader = io.DatumReader() with datafile.DataFileReader(reader, datum_reader) as dfr: for datum in dfr: datums.append(datum) self.assertTrue(reader.closed)
def process_files(output_path, hdfs_path, batch): """Process all files in batch a produce an avro file. """ now = datetime.datetime.now() ts = now.strftime("%Y-%m-%d-%H-%M-%S-%f") output_filename = FILE_PREFIX + "-" + ts + '.avro' print "* creating new avro file: " + output_filename xschema = schema.parse(open(SCHEMA_FILE).read()) rec_writer = io.DatumWriter(xschema) df_writer = datafile.DataFileWriter( open(output_path + output_filename, 'wb'), rec_writer, writers_schema = xschema, codec = 'deflate') for file_path in batch: bytes = read_binary(file_path) content = base64.b64encode(bytes) data = {} data['doc_uuid'] = str(uuid.uuid4()) data['file_path'] = file_path data['content'] = content df_writer.append(data) df_writer.close() time.sleep(1) hdfs_put(output_path + output_filename, hdfs_path)
def sample(args): """ Select a random sample of the records from the input files and write them into an output file. This command assumes that the all the input files have the same schema. Arguments: infiles: Input files outfile: Output file sample_ratio: Ratio of records selected (0 <= ratio <= 1). codec: Compression codec for the output """ # Get the schema from the first file. json_schema = args.infiles[0].meta[datafile.SCHEMA_KEY] writers_schema = schema.parse(json_schema) rec_writer = io.DatumWriter() writer = datafile.DataFileWriter(args.outfile, rec_writer, writers_schema=writers_schema, codec=args.out_codec) for infile in args.infiles: try: for record in infile: if args.sample_ratio >= random.random(): writer.append(record) except: print >> sys.stderr, "Error reading file. Skipping", infile logging.exception('Error reading input file: %s' % infile) continue
def cat(args): """ Concatenates files and stores the result on an output file. It asumes that all the input files have the same schema """ # Get the schema from the first file. json_schema = args.infiles[0].meta[datafile.SCHEMA_KEY] writers_schema = schema.parse(json_schema) rec_writer = io.DatumWriter() writer = datafile.DataFileWriter(args.outfile, rec_writer, writers_schema=writers_schema, codec=args.out_codec) for infile in args.infiles: try: for record in infile: writer.append(record) except: print >> sys.stderr, "Error reading file. Skipping", infile logging.exception('Error reading input file: %s' % infile) continue writer.close()
def main(): # 检测参数个数 if len(sys.argv) != 3: sys.exit('Usage %s <Schema file> <Data_file>' % (sys.argv[0])) # 从 avsc 文件中读取模式 schema_string = open(sys.argv[1], "r").read() # 打开 avro 文件 avro_file = open(sys.argv[2], "wb") # 获取 DatumWriter 对象 datum_writer = io.DatumWriter() # 解析模式 schema_object = schema.parse(schema_string) # 获得 DataFileWriter 对象 data_file_writer = datafile.DataFileWriter(avro_file, datum_writer, schema_object) # 从输入中赋值 for line in sys.stdin: (left, right) = line[:-1].split(",") data_file_writer.append({'left': left, "right": right}) # 关闭 DataFileWriter data_file_writer.close()
def write(opts, files): if not opts.schema: raise AvroError('No schema specified') input_type = opts.input_type or guess_input_type(files) if not input_type: raise AvroError('Cannot guess input file type (not .json or .csv)') try: with open(opts.schema, 'rt') as f: json_schema = f.read() writer_schema = schema.parse(json_schema) out = _open(opts.output, 'wb') except (IOError, OSError) as e: raise AvroError('Cannot open file - %s' % e) record_parser_map = { 'json': iter_json, 'csv': iter_csv, } with datafile.DataFileWriter( writer=out, datum_writer=avro_io.DatumWriter(), writer_schema=writer_schema, ) as writer: iter_records = record_parser_map[input_type] for filename in (files or ['-']): reader = _open(filename, 'rt') for record in iter_records(reader, writer_schema): writer.append(record)
def test_container(self): writer = open('data.avro', 'wb') datum_writer = io.DatumWriter() schema_object = schema.parse("""\ { "type": "record", "name": "StringPair", "doc": "A pair of strings.", "fields": [ {"name": "left", "type": "string"}, {"name": "right", "type": "string"} ] } """) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object) datum = {'left': 'L', 'right': 'R'} dfw.append(datum) dfw.close() reader = open('data.avro', 'rb') datum_reader = io.DatumReader() dfr = datafile.DataFileReader(reader, datum_reader) data = [] for datum in dfr: data.append(datum) self.assertEquals(1, len(data)) self.assertEquals(datum, data[0])
def testMetadata(self): file_path = self.NewTempFile() # Test the writer with a 'with' statement. with open(file_path, 'wb') as writer: datum_writer = io.DatumWriter() sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1] schema_object = schema.parse(sample_schema) with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw: dfw.SetMeta('test.string', 'foo') dfw.SetMeta('test.number', '1') dfw.append(sample_datum) self.assertTrue(writer.closed) # Test the reader with a 'with' statement. datums = [] with open(file_path, 'rb') as reader: datum_reader = io.DatumReader() with datafile.DataFileReader(reader, datum_reader) as dfr: self.assertEqual(b'foo', dfr.GetMeta('test.string')) self.assertEqual(b'1', dfr.GetMeta('test.number')) for datum in dfr: datums.append(datum) self.assertTrue(reader.closed)
def main(): if len(sys.argv) < 2: print "Usage: cat input.json | python2.7 JSONtoAvro.py output" return s = schema.parse(open("tweet.avsc").read()) f = open(sys.argv[1], "wb") writer = datafile.DataFileWriter(f, io.DatumWriter(), s, codec = 'deflate') failed = 0 for line in sys.stdin: line = line.strip() try: data = json.loads(line) except ValueError as detail: continue try: writer.append(data) except io.AvroTypeException as detail: print line failed += 1 writer.close() print str(failed) + " failed in schema"
def test_view_avro(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) if cluster.fs.isdir("/test-avro-filebrowser"): cluster.fs.rmtree('/test-avro-filebrowser/') cluster.fs.mkdir('/test-avro-filebrowser/') test_schema = schema.parse(""" { "name": "test", "type": "record", "fields": [ { "name": "name", "type": "string" }, { "name": "integer", "type": "int" } ] } """) f = cluster.fs.open('/test-avro-filebrowser/test-view.avro', "w") data_file_writer = datafile.DataFileWriter(f, io.DatumWriter(), writers_schema=test_schema, codec='deflate') dummy_datum = { 'name': 'Test', 'integer': 10, } data_file_writer.append(dummy_datum) data_file_writer.close() # autodetect response = c.get('/filebrowser/view/test-avro-filebrowser/test-view.avro') # (Note: we use eval here cause of an incompatibility issue between # the representation string of JSON dicts in simplejson vs. json) assert_equal(eval(response.context['view']['contents']), dummy_datum) # offsetting should work as well response = c.get('/filebrowser/view/test-avro-filebrowser/test-view.avro?offset=1') assert_equal('avro', response.context['view']['compression']) f = cluster.fs.open('/test-avro-filebrowser/test-view2.avro', "w") f.write("hello") f.close() # we shouldn't autodetect non avro files response = c.get('/filebrowser/view/test-avro-filebrowser/test-view2.avro') assert_equal(response.context['view']['contents'], "hello") # we should fail to do a bad thing if they specify compression when it's not set. response = c.get('/filebrowser/view/test-avro-filebrowser/test-view2.avro?compression=gzip') assert_true('Failed to decompress' in response.context['message']) finally: try: cluster.fs.rmtree('/test-avro-filebrowser/') except: pass # Don't let cleanup errors mask earlier failures
def write(interop_schema, writer, codec): datum_writer = io.DatumWriter() dfw = datafile.DataFileWriter(writer, datum_writer, interop_schema, codec=codec) dfw.append(DATUM) dfw.close()
def write_avro_file(self, file_object, schema, rec_creator, n_samples, sync_interval): avdf.SYNC_INTERVAL = sync_interval self.assertEqual(avdf.SYNC_INTERVAL, sync_interval) writer = avdf.DataFileWriter(file_object, DatumWriter(), schema) for i in xrange(n_samples): writer.append(rec_creator(i)) writer.close()
def write_avro_file(self, rec_creator, n_samples, sync_interval): avdf.SYNC_INTERVAL = sync_interval self.assertEqual(avdf.SYNC_INTERVAL, sync_interval) fo = self._mkf('data.avro', mode='wb') with avdf.DataFileWriter(fo, DatumWriter(), self.schema) as writer: for i in range(n_samples): writer.append(rec_creator(i)) return fo.name
def test_view_snappy_compressed_avro(): if not snappy_installed(): raise SkipTest import snappy cluster = pseudo_hdfs4.shared_cluster() finish = [] try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) if cluster.fs.isdir("/test-snappy-avro-filebrowser"): cluster.fs.rmtree('/test-snappy-avro-filebrowser/') cluster.fs.mkdir('/test-snappy-avro-filebrowser/') test_schema = schema.parse(""" { "name": "test", "type": "record", "fields": [ { "name": "name", "type": "string" }, { "name": "integer", "type": "int" } ] } """) # Cannot use StringIO with datafile writer! f = cluster.fs.open('/test-snappy-avro-filebrowser/test-view.compressed.avro', "w") data_file_writer = datafile.DataFileWriter(f, io.DatumWriter(), writers_schema=test_schema, codec='snappy') dummy_datum = { 'name': 'Test', 'integer': 10, } data_file_writer.append(dummy_datum) data_file_writer.close() f.close() # Check to see if snappy is the codec f = cluster.fs.open('/test-snappy-avro-filebrowser/test-view.compressed.avro', "r") assert_true('snappy' in f.read()) f.close() # Snappy compressed succeed response = c.get('/filebrowser/view/test-snappy-avro-filebrowser/test-view.compressed.avro') assert_equal('avro', response.context['view']['compression']) assert_equal(eval(response.context['view']['contents']), dummy_datum, response) finally: for done in finish: done() try: cluster.fs.rmtree('/test-snappy-avro-filebrowser/') except: pass # Don't let cleanup errors mask earlier failures
def test_write_data(self): writer = open('pairs.avro', 'wb') datum_writer = io.DatumWriter() schema_object = schema.Parse(open('Pair.avsc').read()) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object) dfw.append({'left': 'a', 'right': '1'}) dfw.append({'left': 'c', 'right': '2'}) dfw.append({'left': 'b', 'right': '3'}) dfw.append({'left': 'b', 'right': '2'}) dfw.close()
def generate(schema_file, output_path): interop_schema = schema.Parse(open(schema_file, 'r').read()) datum_writer = io.DatumWriter() for codec in datafile.VALID_CODECS: filename = 'py3' if codec != 'null': filename += '_' + codec with Path(output_path, filename).with_suffix('.avro').open('wb') as writer, \ datafile.DataFileWriter(writer, datum_writer, interop_schema, codec) as dfw: dfw.append(DATUM)
def init_avro(self, output_path, part_id, schema_path): output_dir = None output_dirtmp = None # Handle Avro Write Error if (type(output_path) is str): output_dir = self.init_directory(output_path) output_dirtmp = self.init_directory( output_path + 'tmp') # Handle Avro Write Error out_filename = '%(output_dir)s/part-%(part_id)s.avro' % \ {"output_dir": output_dir, "part_id": str(part_id)} out_filenametmp = '%(output_dirtmp)s/part-%(part_id)s.avro' % \ {"output_dirtmp": output_dirtmp, "part_id": str(part_id)} # Handle Avro Write Error self.schema = open(schema_path, 'r').read() email_schema = schema.parse(self.schema) rec_writer = io.DatumWriter(email_schema) self.avro_writer = datafile.DataFileWriter(open(out_filename, 'wb'), rec_writer, email_schema) # CREATE A TEMP AvroWriter that can be used to workaround the UnicodeDecodeError when writing into AvroStorage self.avro_writertmp = datafile.DataFileWriter( open(out_filenametmp, 'wb'), rec_writer, email_schema)
def init_avro(self, output_path, part_id, schema_path): output_dir = None if (type(output_path) is str): output_dir = self.init_directory(output_path) out_filename = '%(output_dir)s/part-%(part_id)s.avro' % \ {"output_dir": output_dir, "part_id": str(part_id)} self.schema = open(schema_path, 'r').read() email_schema = schema.parse(self.schema) rec_writer = io.DatumWriter(email_schema) self.avro_writer = datafile.DataFileWriter(open(out_filename, 'wb'), rec_writer, email_schema)
def write_avro_file(args, outsource='args.avro'): SCHEMA = schema.parse(makeSchema(args)) rec_writer = io.DatumWriter(SCHEMA) if outsource == sys.stdout: df_writer = datafile.DataFileWriter(sys.stdout, rec_writer, writers_schema = SCHEMA, codec = 'deflate') else: df_writer = datafile.DataFileWriter(open(outsource,'wb'), rec_writer, writers_schema = SCHEMA, codec = 'deflate') data = {} count = 1 data['size'] = len(args) for arg in args: if type(arg) == tuple: arg = tupleToList(arg) data["arg%s"%(count)] = arg count +=1 df_writer.append(data) df_writer.close()
def testRoundTrip(self): correct = 0 codecs_to_validate = get_codecs_to_validate() for iexample, (writer_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): for codec in codecs_to_validate: file_path = self.NewTempFile() # Write the datum this many times in the data file: nitems = 10 logging.debug( 'Performing round-trip with codec %r in file %s for example #%d\n' 'Writing datum: %r using writer schema:\n%s', codec, file_path, iexample, datum, writer_schema) logging.debug('Creating data file %r', file_path) with open(file_path, 'wb') as writer: datum_writer = io.DatumWriter() schema_object = schema.parse(writer_schema) with datafile.DataFileWriter( writer=writer, datum_writer=datum_writer, writer_schema=schema_object, codec=codec, ) as dfw: for _ in range(nitems): dfw.append(datum) logging.debug('Reading data from %r', file_path) with open(file_path, 'rb') as reader: datum_reader = io.DatumReader() with datafile.DataFileReader(reader, datum_reader) as dfr: round_trip_data = list(dfr) logging.debug( 'Round-trip data has %d items: %r', len(round_trip_data), round_trip_data) if ([datum] * nitems) == round_trip_data: correct += 1 else: logging.error( 'Round-trip data does not match:\n' 'Expect: %r\n' 'Actual: %r', [datum] * nitems, round_trip_data) self.assertEqual( correct, len(codecs_to_validate) * len(SCHEMAS_TO_VALIDATE))
def test_write_data(self): writer = open('pairs.avro', 'wb') datum_writer = io.DatumWriter() schema_object = schema.parse( open( '/Users/tom/workspace/hadoop-book-avro/src/main/java/Pair.avsc' ).read()) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object) dfw.append({'left': 'a', 'right': '1'}) dfw.append({'left': 'c', 'right': '2'}) dfw.append({'left': 'b', 'right': '3'}) dfw.append({'left': 'b', 'right': '2'}) dfw.close()
def test_empty_datafile(self): """A reader should not fail to read a file consisting of a single empty block.""" sample_schema = schema.parse(SCHEMAS_TO_VALIDATE[1][0]) with datafile.DataFileWriter(open(FILENAME, 'wb'), io.DatumWriter(), sample_schema) as dfw: dfw.flush() # Write an empty block dfw.encoder.write_long(0) dfw.encoder.write_long(0) dfw.writer.write(dfw.sync_marker) with datafile.DataFileReader(open(FILENAME, 'rb'), io.DatumReader()) as dfr: self.assertEqual([], list(dfr))
def test_write_data(self): writer = open('pairs.avro', 'wb') datum_writer = io.DatumWriter() schema_object = schema.Parse( open( '/Users/zzy/Docs/hadoop_book/ch12-avro/src/main/resources/StringPair.avsc' ).read()) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object) dfw.append({'left': 'a', 'right': '1'}) dfw.append({'left': 'c', 'right': '2'}) dfw.append({'left': 'b', 'right': '3'}) dfw.append({'left': 'b', 'right': '2'}) dfw.close()
def merge_output_records_to_file(records): bio = BytesIO() schema = avs.Parse(json.dumps(output_schema)) writer = aio.DatumWriter() writer.write = lambda datum, encoder: encoder.write(datum) dw = adf.DataFileWriter(bio, writer, schema) for r in records: dw.append(r) dw.flush() return bio.getvalue()
def test_round_trip(self): print '' print 'TEST ROUND TRIP' print '===============' print '' correct = 0 for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): for codec in CODECS_TO_VALIDATE: if (codec == 'snappy'): try: import snappy except: print 'Snappy not present. Skipping.' correct += 1 continue print '' print 'SCHEMA NUMBER %d' % (i + 1) print '================' print '' print 'Schema: %s' % example_schema print 'Datum: %s' % datum print 'Codec: %s' % codec # write data in binary to file 10 times writer = open(FILENAME, 'wb') datum_writer = io.DatumWriter() schema_object = schema.parse(example_schema) dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec) for i in range(10): dfw.append(datum) dfw.close() # read data in binary from file reader = open(FILENAME, 'rb') datum_reader = io.DatumReader() dfr = datafile.DataFileReader(reader, datum_reader) round_trip_data = [] for datum in dfr: round_trip_data.append(datum) print 'Round Trip Data: %s' % round_trip_data print 'Round Trip Data Length: %d' % len(round_trip_data) is_correct = [datum] * 10 == round_trip_data if is_correct: correct += 1 print 'Correct Round Trip: %s' % is_correct print '' os.remove(FILENAME) self.assertEquals(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
def test_view_snappy_compressed_avro(self): if not snappy_installed(): raise SkipTest import snappy finish = [] try: prefix = self.cluster.fs_prefix + '/test-snappy-avro-filebrowser' self.cluster.fs.mkdir(prefix) test_schema = schema.parse(""" { "name": "test", "type": "record", "fields": [ { "name": "name", "type": "string" }, { "name": "integer", "type": "int" } ] } """) # Cannot use StringIO with datafile writer! f = self.cluster.fs.open(prefix +'/test-view.compressed.avro', "w") data_file_writer = datafile.DataFileWriter(f, io.DatumWriter(), writers_schema=test_schema, codec='snappy') dummy_datum = { 'name': 'Test', 'integer': 10, } data_file_writer.append(dummy_datum) data_file_writer.close() f.close() # Check to see if snappy is the codec f = self.cluster.fs.open(prefix + '/test-view.compressed.avro', "r") assert_true('snappy' in f.read()) f.close() # Snappy compressed succeed response = self.c.get('/filebrowser/view=%s/test-view.compressed.avro' % prefix) assert_equal('avro', response.context['view']['compression']) assert_equal(eval(response.context['view']['contents']), dummy_datum, response) finally: for done in finish: done()
def test_view_avro(self): prefix = self.cluster.fs_prefix + '/test_view_avro' self.cluster.fs.mkdir(prefix) test_schema = schema.parse(""" { "name": "test", "type": "record", "fields": [ { "name": "name", "type": "string" }, { "name": "integer", "type": "int" } ] } """) f = self.cluster.fs.open(prefix + '/test-view.avro', "w") data_file_writer = datafile.DataFileWriter(f, io.DatumWriter(), writers_schema=test_schema, codec='deflate') dummy_datum = { 'name': 'Test', 'integer': 10, } data_file_writer.append(dummy_datum) data_file_writer.close() # autodetect response = self.c.get('/filebrowser/view=%s/test-view.avro' % prefix) # (Note: we use eval here cause of an incompatibility issue between # the representation string of JSON dicts in simplejson vs. json) assert_equal(eval(response.context['view']['contents']), dummy_datum) # offsetting should work as well response = self.c.get('/filebrowser/view=%s/test-view.avro?offset=1' % prefix) assert_equal('avro', response.context['view']['compression']) f = self.cluster.fs.open(prefix + '/test-view2.avro', "w") f.write("hello") f.close() # we shouldn't autodetect non avro files response = self.c.get('/filebrowser/view=%s/test-view2.avro' % prefix) assert_equal(response.context['view']['contents'], "hello") # we should fail to do a bad thing if they specify compression when it's not set. response = self.c.get('/filebrowser/view=%s/test-view2.avro?compression=gzip' % prefix) assert_true('Failed to decompress' in response.context['message'])