class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic,sid = struct.unpack('>bI',message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_for_topic(self): topic = 'test' basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_for_topic(topic, record) self.assertMessageIsSame(message, record ,schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record ,schema_id)
class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id( adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_for_topic(self): topic = 'test' basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_for_topic(topic, record) self.assertMessageIsSame(message, record, schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id)
def test_select(cluster): # type: (ClickHouseCluster) -> None schema_registry_client = cluster.schema_registry_client serializer = MessageSerializer(schema_registry_client) schema = avro.schema.make_avsc_object({ 'name': 'test_record', 'type': 'record', 'fields': [{ 'name': 'value', 'type': 'long' }] }) buf = io.BytesIO() for x in range(0, 3): message = serializer.encode_record_with_schema('test_subject', schema, {'value': x}) buf.write(message) data = buf.getvalue() instance = cluster.instances["dummy"] # type: ClickHouseInstance schema_registry_url = "http://{}:{}".format(cluster.schema_registry_host, cluster.schema_registry_port) run_query(instance, "create table avro_data(value Int64) engine = Memory()") settings = {'format_avro_schema_registry_url': schema_registry_url} run_query(instance, "insert into avro_data format AvroConfluent", data, settings) stdout = run_query(instance, "select * from avro_data") assert list(map(str.split, stdout.splitlines())) == [ ["0"], ["1"], ["2"], ]
def writeToavro(p, mls): #converts the payload into the avro format in preparation for loading into hbase try: avro_schema = Util.parse_schema_from_string( open('/root/quest/twitter_avro_schema.avsc').read()) client = CachedSchemaRegistryClient(url='http://192.168.111.12:8081') schema_id = client.register('twitter_avro__schema_stream4', avro_schema) avro_schema = client.get_by_id(schema_id) schema_id, avro_schema, schema_version = client.get_latest_schema( 'twitter_avro__schema_stream4') schema_version = client.get_version('twitter_avro__schema_stream4', avro_schema) serializer = MessageSerializer(client) encoded = serializer.encode_record_with_schema( topicname, avro_schema, { "authid": mls[0], "screen_name": mls[1], "description": mls[2], "favourites_count": convert_long(mls[3]), "followers_count": convert_long(mls[4]), "friends_count": convert_long(mls[5]), "listed_count": convert_long(mls[6]), "location": mls[7], "id_str": mls[8], "time_zone": mls[9], "statuses_count": convert_long(mls[10]), "created_at": mls[11], "favorite_count": convert_long(mls[12]), "tid": mls[13], "in_reply_to_status_id_str": mls[14], "in_reply_to_user_id_str": mls[15], "lang": mls[16], "possibly_sensitive": mls[17], "retweet_count": convert_long(mls[18]), "text": mls[19], "entities_url": mls[20], "entities_expanded_url": mls[21], "entities_media_url": mls[22], "disgust": convert_long(mls[23]), "fear": convert_long(mls[24]), "sadness": convert_long(mls[25]), "surprise": convert_long(mls[26]), "trust": convert_long(mls[27]), "negative": convert_long(mls[28]), "positive": convert_long(mls[29]), "neutral": convert_long(mls[30]), "celebrities": (mls[31]), "events": (mls[32]), "brands": (mls[33]), "accessories": (mls[34]) }) except Exception, e: logging.debug( 'There was an error in the generation of the avro file. The error is: %s' % e) print 'Error in avro generation : ', e print mls twitter_utils.sendErrorMail( 'There was an error in the generation of the avro file. The error is %s' % e) return True