Example #1
0
def loadavsc(avscid):
  global avscmap
  global options
  serializelog.debug("In loadavsc with avscid: %s" % avscid)
  avsc = None
  serializelog.debug("options.urlscreg: %s options.calocation: %s" % (options.urlscreg, options.calocation))

  try:
    serializelog.debug("querying screg with avscid: %s" % (avscid))
    client = CachedSchemaRegistryClient({'url':options.urlscreg, 'ssl.ca.location':options.calocation})
    avsc = client.get_by_id(avscid)
  except Exception as e:
    serializelog.info("ERROR: load avro schema from schema-registry-server is failed on CachedSchemaRegistryClient on using method get_by_id()")
    serializelog.info("ERROR: %s" % (e))

  try:
    avsc_dict = json.loads(str(avsc))
  except Exception as e:
    serializelog.info("ERROR: json.loads of the avsc_str is faild to produce a dict")
    serializelog.info("ERROR: %s" % (e))

  serializelog.info("SCHEMA_OF_ID(%s): %s" % (avscid, avsc_dict["name"]))

  #Query Schema-Registry
  #jsonmap = json.load(mapfile)
  if avscid in avscmap:
    serializelog.debug("Update avscmap the existing record avscid (%s) with avroschema" % avscid)
    avscmap[avscid].update({"avsc": avsc_dict})
  else:
    serializelog.debug("Update avscmap with new record avscid (%s) with avroschema" % avscid)
    avscmap.update({avscid:{"avsc": avsc_dict}})

  return avsc
Example #2
0
    def test_value_subject_name_strategies(self):
        client = CachedSchemaRegistryClient({
            'url':
            'https://*****:*****@127.0.0.1:65534',
            'value.subject.name.strategy':
            "TopicNameStrategy",
        })

        expected = topic_name_strategy
        self.assertEqual(expected, client.value_subject_name_strategy_func)

        client = CachedSchemaRegistryClient({
            'url':
            'https://*****:*****@127.0.0.1:65534',
            'value.subject.name.strategy':
            "RecordNameStrategy",
        })

        expected = record_name_strategy
        self.assertEqual(expected, client.value_subject_name_strategy_func)

        client = CachedSchemaRegistryClient({
            'url':
            'https://*****:*****@127.0.0.1:65534',
            'value.subject.name.strategy':
            "TopicRecordNameStrategy",
        })

        expected = topic_record_name_strategy
        self.assertEqual(expected, client.value_subject_name_strategy_func)
Example #3
0
    def loadavsc(self, avscid):
        # global  self.avscmap
        self.__logger.debug("In loadavsc with avscid: %s" % avscid)
        avsc = None
        self.__logger.debug(
            "lib_pmgrpcd.OPTIONS.urlscreg: %s lib_pmgrpcd.OPTIONS.calocation: %s"
            % (lib_pmgrpcd.OPTIONS.urlscreg, lib_pmgrpcd.OPTIONS.calocation))

        try:
            self.__logger.debug(
                "Instancing client (CachedSchemaRegistryClient) with avscid:%s url:%s ssl.ca.location:%s",
                avscid,
                lib_pmgrpcd.OPTIONS.urlscreg,
                lib_pmgrpcd.OPTIONS.calocation,
            )
            client = CachedSchemaRegistryClient(
                url=lib_pmgrpcd.OPTIONS.urlscreg,
                ca_location=lib_pmgrpcd.OPTIONS.calocation,
            )
        except Exception as e:
            self.__logger.info(
                "ERROR: load avro schema from schema-registry-server is failed on CachedSchemaRegistryClient on using method get_by_id()"
            )
            self.__logger.info("ERROR: %s" % (e))
            return avsc

        try:
            avsc = client.get_by_id(avscid)
        except Exception as e:
            self.__logger.info(
                "ERROR: load avro schema from schema-registry-server is failed on CachedSchemaRegistryClient on using method get_by_id()"
            )
            self.__logger.info("ERROR: %s" % (e))
            return avsc

        try:
            avsc_dict = json.loads(str(avsc))
        except Exception as e:
            self.__logger.info(
                "ERROR: json.loads of the avsc_str is faild to produce a dict")
            self.__logger.info("ERROR: %s" % (e))
            return avsc

        self.__logger.info("SCHEMA_OF_ID(%s): %s" %
                           (avscid, avsc_dict["name"]))

        # Query Schema-Registry
        # self.jsonmap = json.load(mapfile)
        if avscid in self.avscmap:
            self.__logger.debug(
                "Update  self.avscmap the existing record avscid (%s) with avroschema"
                % avscid)
            self.avscmap[avscid].update({"avsc": avsc_dict})
        else:
            self.__logger.debug(
                "Update  self.avscmap with new record avscid (%s) with avroschema"
                % avscid)
            self.avscmap.update({avscid: {"avsc": avsc_dict}})

        return avsc
def produce(config, topic, input_messages):
    """
        produce initiate sending a message to Kafka, call the produce method passing in the input_messages key/value
        and and callback
    Parameters
    ----------
        topic: str
            topic where the input message publish too
        input_messages: dict
            a key/value input messages
        config: dict
            the config values that needed by the produce

     """
    if topic is None:
        logger.debug('Required topic field must be set')
        raise ValueError()

    if len(input_messages) <= 0:
        logger.debug('Required data field must not be empty.')
        raise ValueError()

    bootstrap_servers, schema_registry = producer_config(config)

    producer = Producer(bootstrap_servers)
    admin_client = AdminClient(bootstrap_servers)
    topics = admin_client.list_topics().topics
    #Just to show what's available
    print(topics)

    if not topics:
        print('Not Topics')
        raise RuntimeError()

    sr = CachedSchemaRegistryClient(schema_registry)
    ser = MessageSerializer(sr)
    # get schema
    id, schema, version = sr.get_latest_schema(topic + "-value")
    if schema:
        print('In If Schema')
        for key, value in input_messages.items():
            if validate_uuid4(key):
                print('In validate in For loop')
                serializedMessage = ser.encode_record_with_schema(
                    topic, schema, value)
                producer.produce(topic=topic,
                                 key=key,
                                 value=serializedMessage,
                                 callback=acked)
                # producer.flush() # bad idea, it limits throughput to the broker round trip time
                producer.poll(1)
            else:
                print('In Else of For Loop')
                logger.error('Invalid UUID String: ', key)

    else:
        print('Schema not found for topic name: ', topic)
        print('In Else Schema')
    sys.exit(1)
Example #5
0
    def initSchemaRegistry(self):
        try:
            RegistryConfig = {'url': self.Conf['schema.registry']}
        except:
            return

        self.RegistryClient = CachedSchemaRegistryClient(**RegistryConfig)
        debug(level=1, RegistryClient=self.RegistryClient)
 def init_consumer(self, schema_registry_url, topics):
     logger.info("Initializing avro consumer")
     self.consumer = Consumer(self.consumer_conf)
     logger.info(f"Schema registry url: {schema_registry_url}")
     self.register_client = CachedSchemaRegistryClient(
         url=schema_registry_url)
     logger.info(f"Subscribing to topics: {topics}")
     self.topics = topics
     self.consumer.subscribe(self.topics)
Example #7
0
 def __init__(self, schema_registry_url):
     """Private implementation class for Avro IO using the registry"""
     log.info(
         f"Using registry with schema_url/id {schema_registry_url}/{config.SCHEMA_ID}"
     )
     try:
         self.client = CachedSchemaRegistryClient(url=schema_registry_url)
         self.schema = self.client.get_by_id(config.SCHEMA_ID)
         self.serializer = MessageSerializer(self.client)
     except:
         raise ValueError("Client id or schema id not found")
 def __init__(self, app):
     print("2")
     LocationKafkaListerner.__instance = self
     self.app = app
     self.config = Config.getInstance()
     self.register_client = CachedSchemaRegistryClient(
         url=app.config['KAFKA_SCHEMA_REGISTRY_URL']
     )
     print("3")
     self.client = self.config.getESClient()
     threading.Thread(target=self.readJobsData).start()
     threading.Thread(target=self.readMappingsData).start()
     threading.Thread(target=self.readSubmissionsData).start()
    def __init__(self, schema_subject, schema_registry_url):
        ''' Create a new serializer object, which includes the remote-loaded
        schema object specified by schema_subject.

        Note this constructor is not exception safe
        '''

        self.schema_subject = schema_subject
        self.schema_registry_url = schema_registry_url
        self.schema_registry_client = CachedSchemaRegistryClient(url=self.schema_registry_url)

        self._load_schema()

        self.writer = SchemalessAvroRecordWriter(self.avro_schema)
Example #10
0
    def init_consumer(self):
        bootstrap_server = self.config['bootstrap-server']
        schema_url = self.config['schema-registery-url']
        # KAFKA BROKER URL
        consumer = Consumer({
            'bootstrap.servers': bootstrap_server, 
            'group.id': 'catalog-export-%s' %self.catalogId,
            'auto.offset.reset': 'earliest'
        })

        # SCHEMA URL
        self.register_client = CachedSchemaRegistryClient(url=schema_url)
        consumer.subscribe(['catserver-%s-catalog' % self.catalogId], on_assign=self.my_on_assign)
        return consumer
Example #11
0
    def __init__(self,
                 config,
                 default_key_schema=None,
                 default_value_schema=None,
                 schema_registry=None):

        sr_conf = {
            key.replace("schema.registry.", ""): value
            for key, value in config.items()
            if key.startswith("schema.registry")
        }

        if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT':
            sr_conf['sasl.mechanisms'] = config.get('sasl.mechanisms', '')
            sr_conf['sasl.username'] = config.get('sasl.username', '')
            sr_conf['sasl.password'] = config.get('sasl.password', '')

        ap_conf = {
            key: value
            for key, value in config.items()
            if not key.startswith("schema.registry")
        }

        if schema_registry is None:
            schema_registry = CachedSchemaRegistryClient(sr_conf)
        elif sr_conf.get("url", None) is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(AvroProducer, self).__init__(ap_conf)
        self._serializer = MessageSerializer(schema_registry)
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema
Example #12
0
    def __init__(self,
                 config,
                 default_key_schema=None,
                 default_value_schema=None,
                 schema_registry=None,
                 subject_name_strategy=SubjectNameStrategy.RecordNameStrategy
                 ):

        sr_conf = {key.replace("schema.registry.", ""): value
                   for key, value in config.items() if key.startswith("schema.registry")}

        if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT':
            # Fallback to plural 'mechanisms' for backward compatibility
            sr_conf['sasl.mechanism'] = config.get('sasl.mechanism', config.get('sasl.mechanisms', ''))
            sr_conf['sasl.username'] = config.get('sasl.username', '')
            sr_conf['sasl.password'] = config.get('sasl.password', '')
            sr_conf['auto.register.schemas'] = config.get('auto.register.schemas', True)

        ap_conf = {key: value
                   for key, value in config.items() if not key.startswith("schema.registry")}

        if schema_registry is None:
            schema_registry = CachedSchemaRegistryClient(sr_conf)
        elif sr_conf.get("url", None) is not None:
            raise ValueError("Cannot pass schema_registry along with schema.registry.url config")

        super(AvroProducer, self).__init__(ap_conf)
        self._serializer = MessageSerializer(schema_registry, subject_name_strategy=subject_name_strategy)
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema
Example #13
0
    def __init__(self,
                 config,
                 default_key_schema=None,
                 default_value_schema=None,
                 schema_registry=None):

        schema_registry_url = config.pop("schema.registry.url", None)
        schema_registry_ca_location = config.pop(
            "schema.registry.ssl.ca.location", None)
        schema_registry_certificate_location = config.pop(
            "schema.registry.ssl.certificate.location", None)
        schema_registry_key_location = config.pop(
            "schema.registry.ssl.key.location", None)

        if schema_registry is None:
            if schema_registry_url is None:
                raise ValueError("Missing parameter: schema.registry.url")

            schema_registry = CachedSchemaRegistryClient(
                url=schema_registry_url,
                ca_location=schema_registry_ca_location,
                cert_location=schema_registry_certificate_location,
                key_location=schema_registry_key_location)
        elif schema_registry_url is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(AvroProducer, self).__init__(config)
        self._serializer = MessageSerializer(schema_registry)
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema
 def test_basic_auth_url(self):
     self.client = CachedSchemaRegistryClient({
         'url':
         'https://*****:*****@127.0.0.1:65534',
     })
     self.assertTupleEqual(('user_url', 'secret_url'),
                           self.client._session.auth)
 def test_basic_auth_invalid(self):
     with self.assertRaises(ValueError):
         self.client = CachedSchemaRegistryClient({
             'url':
             'https://*****:*****@127.0.0.1:65534',
             'basic.auth.credentials.source':
             'VAULT',
         })
Example #16
0
 def test_invalid_value_subject_name_strategy(self):
     with self.assertRaises(ValueError):
         CachedSchemaRegistryClient({
             'url':
             'https://*****:*****@127.0.0.1:65534',
             'value.subject.name.strategy':
             "InvalidNameStrategy",
         })
 def setUp(self):
     # Make RecordSchema and PrimitiveSchema hashable
     schema.RecordSchema.__hash__ = self.hash_func
     schema.PrimitiveSchema.__hash__ = self.hash_func
     self.server = mock_registry.ServerThread(9002)
     self.server.start()
     self.client = CachedSchemaRegistryClient('http://127.0.0.1:9002')
     time.sleep(1)
Example #18
0
    def test_default_value_subject_name_strategy(self):
        client = CachedSchemaRegistryClient({
            'url':
            'https://*****:*****@127.0.0.1:65534',
        })

        expected = topic_name_strategy

        self.assertEqual(expected, client.value_subject_name_strategy_func)
Example #19
0
    def __init__(self, config):

        if ('schema.registry.url' not in config.keys()):
            raise ValueError("Missing parameter: schema.registry.url")
        schem_registry_url = config["schema.registry.url"]
        del config["schema.registry.url"]

        super(AvroConsumer, self).__init__(config)
        self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url))
 def test_init_with_dict(self):
     self.client = CachedSchemaRegistryClient({
         'url':
         'https://127.0.0.1:65534',
         'ssl.certificate.location':
         '/path/to/cert',
         'ssl.key.location':
         '/path/to/key'
     })
     self.assertEqual('https://127.0.0.1:65534', self.client.url)
Example #21
0
    def __init__(self, config, default_key_schema=None,
                 default_value_schema=None):
        if ('schema.registry.url' not in config.keys()):
            raise ValueError("Missing parameter: schema.registry.url")
        schem_registry_url = config["schema.registry.url"]
        del config["schema.registry.url"]

        super(AvroProducer, self).__init__(config)
        self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url))
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema
Example #22
0
    def read_from_offset(self, offset=0, lang='json', schema=None):

        '''

        Kafka read message

        Read json and avro messages from consumer

        '''
        log.debug("[KafkaDriver][read_from_offset] lang: " + str(lang))
        log.debug("[KafkaDriver][read_from_offset] offset: " + str(offset))

        def outputJSON(obj):

            '''

            Default JSON serializer.

            '''

            if isinstance(obj, datetime.datetime):
                return int(obj.strftime("%s%f")[:-3])
            return obj


        ret = None
        log.debug("[KafkaDriver][read_from_offset] read start: " + str(self.server))
        consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092',
                                 auto_offset_reset='earliest',
                                 consumer_timeout_ms=1000)

        partition = TopicPartition(self.topic, 0)
        consumer.assign([partition])
        consumer.seek_to_end(partition)
        start = int(offset)
        consumer.seek(partition, offset)

        for msg in consumer:
            if (lang == 'avro'):
                #message = AvroDecoder.decode(schema, msg.value)
                schema_registry = CachedSchemaRegistryClient(url='http://' + self.schema_registry + ':8081')
                self._serializer = MessageSerializer(schema_registry)
                message = self._serializer.decode_message(msg.value)
                message = json.dumps(message, indent=4, sort_keys=True, default=outputJSON)
                #log.debug("[KafkaDriver][read_from_offset] avro message: " + str(message))
                ret = message
            else:
                message = msg.value
                #log.debug("[KafkaDriver][read_from_offset] other message: " + str(message))
                ret = msg.value
            log.debug("[KafkaDriver][read_from_offset] msg: " + str(message) + " msg.offset: " + str(msg.offset))
        consumer.close()
        log.debug("[KafkaDriver][read_from_offset] read end")
        return ret
 def test_basic_auth_userinfo(self):
     self.client = CachedSchemaRegistryClient({
         'url':
         'https://*****:*****@127.0.0.1:65534',
         'basic.auth.credentials.source':
         'user_info',
         'basic.auth.user.info':
         'user_userinfo:secret_userinfo'
     })
     self.assertTupleEqual(('user_userinfo', 'secret_userinfo'),
                           self.client._session.auth)
 def __init__(self,
              producer,
              schema_registry_url,
              default_key_schema=None,
              default_value_schema=None
              ):  # real signature unknown; restored from __doc__
     self._producer = producer
     self._serializer = MessageSerializer(
         CachedSchemaRegistryClient(url=schema_registry_url))
     self.key_schema = default_key_schema
     self.value_schema = default_value_schema
Example #25
0
def consume(config, topic, handler):
    """
    Starts a consumer and calls the given handler for each consumed message.
    Assumes that keys are serialized as strings and values are serialized
    as Avro objects with their schemas stored in a Confluent Schema Registry.
    """
    c_conf = {}
    for key, value in config.items():
        if not key.startswith("schema.registry"):
            if not value is None:
                c_conf[key] = value.strip()

    if "auto.offset.reset" in c_conf:
        print("offset provided")
    else:
        c_conf['auto.offset.reset'] = 'earliest'

    if "group.id" in c_conf:
        print("group id provided")
    else:
        c_conf['group.id'] = 'sme_test'

    c = Consumer(c_conf)
    c.subscribe([topic])

    sr_conf = {
        key.replace("schema.registry.", ""): value.strip()
        for key, value in config.items() if key.startswith("schema.registry")
    }

    sr = CachedSchemaRegistryClient(sr_conf)
    ser = MessageSerializer(sr)

    while True:
        try:
            msg = c.poll(10)
            if msg is None:
                print('No Messages')
                continue
            if msg.error():
                log.error("Consumer error: {}".format(msg.error()))
                continue
            key = msg.key().decode('utf-8')
            value = ser.decode_message(msg.value(), is_key=False)
        except Exception as e:
            log.error("Message consumption failed: {}".format(e))
            break
        try:
            handler(key, value)
        except Exception as e:
            log.error("Message handler failed: {}".format(e))
            break
    c.close()
Example #26
0
 def test_basic_auth_sasl_inherit_invalid(self):
     with self.assertRaises(ValueError) as e:
         self.client = CachedSchemaRegistryClient({
             'url':
             'https://*****:*****@127.0.0.1:65534',
             'basic.auth.credentials.source':
             'SASL_INHERIT',
             'sasl.mechanism':
             'gssapi'  # also test the .upper()
         })
     self.assertEqual(
         str(e.exception),
         "SASL_INHERIT does not support SASL mechanism GSSAPI")
Example #27
0
    def __init__(self, config, schema_registry=None):
        schema_registry_url = config.pop("schema.registry.url", None)
        if schema_registry is None:
            if schema_registry_url is None:
                raise ValueError("Missing parameter: schema.registry.url")
            schema_registry = CachedSchemaRegistryClient(
                url=schema_registry_url)
        elif schema_registry_url is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(AvroConsumer, self).__init__(config)
        self._serializer = MessageSerializer(schema_registry)
 def test_basic_auth_sasl_inherit(self):
     self.client = CachedSchemaRegistryClient({
         'url':
         'https://*****:*****@127.0.0.1:65534',
         'basic.auth.credentials.source':
         'SASL_INHERIT',
         'sasl.mechanism':
         'PLAIN',
         'sasl.username':
         '******',
         'sasl.password':
         '******'
     })
     self.assertTupleEqual(('user_sasl', 'secret_sasl'),
                           self.client._session.auth)
 def test_invalid_conf(self):
     with self.assertRaises(ValueError):
         self.client = CachedSchemaRegistryClient({
             'url':
             'https://*****:*****@127.0.0.1:65534',
             'basic.auth.credentials.source':
             'SASL_INHERIT',
             'sasl.username':
             '******',
             'sasl.password':
             '******',
             'invalid.conf':
             1,
             'invalid.conf2':
             2
         })
Example #30
0
class _AvroIORegistry:
    def __init__(self, schema_registry_url):
        """Private implementation class for Avro IO using the registry"""
        log.info(
            f"Using registry with schema_url/id {schema_registry_url}/{config.SCHEMA_ID}"
        )
        try:
            self.client = CachedSchemaRegistryClient(url=schema_registry_url)
            self.schema = self.client.get_by_id(config.SCHEMA_ID)
            self.serializer = MessageSerializer(self.client)
        except:
            raise ValueError("Client id or schema id not found")

    def decode(self, bytes):
        return self.serializer.decode_message(bytes)

    def encode(self, record):
        return self.serializer.encode_record_with_schema_id(
            config.SCHEMA_ID, record)