class TestMessageSerializer(unittest.TestCase):

    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic,sid = struct.unpack('>bI',message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)


    def test_encode_record_for_topic(self):
        topic = 'test'
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_for_topic(topic, record)
            self.assertMessageIsSame(message, record ,schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record ,schema_id)
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_for_topic(self):
        topic = 'test'
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_for_topic(topic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)
コード例 #3
0
def test_select(cluster):
    # type: (ClickHouseCluster) -> None

    schema_registry_client = cluster.schema_registry_client
    serializer = MessageSerializer(schema_registry_client)

    schema = avro.schema.make_avsc_object({
        'name':
        'test_record',
        'type':
        'record',
        'fields': [{
            'name': 'value',
            'type': 'long'
        }]
    })

    buf = io.BytesIO()
    for x in range(0, 3):
        message = serializer.encode_record_with_schema('test_subject', schema,
                                                       {'value': x})
        buf.write(message)
    data = buf.getvalue()

    instance = cluster.instances["dummy"]  # type: ClickHouseInstance
    schema_registry_url = "http://{}:{}".format(cluster.schema_registry_host,
                                                cluster.schema_registry_port)

    run_query(instance,
              "create table avro_data(value Int64) engine = Memory()")
    settings = {'format_avro_schema_registry_url': schema_registry_url}
    run_query(instance, "insert into avro_data format AvroConfluent", data,
              settings)
    stdout = run_query(instance, "select * from avro_data")
    assert list(map(str.split, stdout.splitlines())) == [
        ["0"],
        ["1"],
        ["2"],
    ]
コード例 #4
0
def writeToavro(p, mls):
    #converts the payload into the avro format in preparation for loading into hbase
    try:
        avro_schema = Util.parse_schema_from_string(
            open('/root/quest/twitter_avro_schema.avsc').read())
        client = CachedSchemaRegistryClient(url='http://192.168.111.12:8081')
        schema_id = client.register('twitter_avro__schema_stream4',
                                    avro_schema)
        avro_schema = client.get_by_id(schema_id)
        schema_id, avro_schema, schema_version = client.get_latest_schema(
            'twitter_avro__schema_stream4')
        schema_version = client.get_version('twitter_avro__schema_stream4',
                                            avro_schema)
        serializer = MessageSerializer(client)
        encoded = serializer.encode_record_with_schema(
            topicname, avro_schema, {
                "authid": mls[0],
                "screen_name": mls[1],
                "description": mls[2],
                "favourites_count": convert_long(mls[3]),
                "followers_count": convert_long(mls[4]),
                "friends_count": convert_long(mls[5]),
                "listed_count": convert_long(mls[6]),
                "location": mls[7],
                "id_str": mls[8],
                "time_zone": mls[9],
                "statuses_count": convert_long(mls[10]),
                "created_at": mls[11],
                "favorite_count": convert_long(mls[12]),
                "tid": mls[13],
                "in_reply_to_status_id_str": mls[14],
                "in_reply_to_user_id_str": mls[15],
                "lang": mls[16],
                "possibly_sensitive": mls[17],
                "retweet_count": convert_long(mls[18]),
                "text": mls[19],
                "entities_url": mls[20],
                "entities_expanded_url": mls[21],
                "entities_media_url": mls[22],
                "disgust": convert_long(mls[23]),
                "fear": convert_long(mls[24]),
                "sadness": convert_long(mls[25]),
                "surprise": convert_long(mls[26]),
                "trust": convert_long(mls[27]),
                "negative": convert_long(mls[28]),
                "positive": convert_long(mls[29]),
                "neutral": convert_long(mls[30]),
                "celebrities": (mls[31]),
                "events": (mls[32]),
                "brands": (mls[33]),
                "accessories": (mls[34])
            })
    except Exception, e:
        logging.debug(
            'There was an error in the generation of the avro file. The error is: %s'
            % e)
        print 'Error in avro generation : ', e
        print mls
        twitter_utils.sendErrorMail(
            'There was an error in the generation of the avro file. The error is %s'
            % e)
        return True