def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        p = Producer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    p = Producer({'socket.timeout.ms': 10,
                  'error_cb': error_cb,
                  'message.timeout.ms': 10})

    p.produce('mytopic')
    p.produce('mytopic', value='somedata', key='a key')

    def on_delivery(err, msg):
        print('delivery', str)
        # Since there is no broker, produced messages should time out.
        assert err.code() == KafkaError._MSG_TIMED_OUT

    p.produce(topic='another_topic', value='testing', partition=9,
              callback=on_delivery)

    p.poll(0.001)

    p.flush(0.002)
    p.flush()

    try:
        p.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
Esempio n. 2
0
class KafkaPublisher(object):
    def __init__(self, connection, asynchronous=True):
        from confluent_kafka import Producer

        self.producer = Producer(connection or {})
        self.asynchronous = asynchronous

    def publish(self, channel, value, key=None):
        self.producer.produce(topic=channel, value=value, key=key)
        if not self.asynchronous:
            self.producer.flush()
def test_produce_timestamp():
    """ Test produce() with timestamp arg """
    p = Producer({'socket.timeout.ms': 10,
                  'error_cb': error_cb,
                  'message.timeout.ms': 10})

    # Requires librdkafka >=v0.9.4

    try:
        p.produce('mytopic', timestamp=1234567)
    except NotImplementedError:
        # Should only fail on non-supporting librdkafka
        if libversion()[1] >= 0x00090400:
            raise

    p.flush()
Esempio n. 4
0
def producer(args, sniff_timeout_ms=500, sniff_promisc=True):
    """ Captures packets from a network interface and sends them to a Kafka topic. """

    # setup the signal handler
    signal.signal(signal.SIGINT, signal_handler)

    global producer_args
    producer_args = args

    # connect to kafka
    logging.info("Connecting to Kafka; %s", args.kafka_configs)
    kafka_producer = Producer(args.kafka_configs)

    # initialize packet capture
    logging.info("Starting packet capture")
    capture = pcapy.open_live(args.interface, args.snaplen, sniff_promisc, sniff_timeout_ms)
    pkts_in = 0

    try:
        while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets):

            # capture a packet
            (pkt_hdr, pkt_raw) = capture.next()
            if pkt_hdr is not None:
                logging.debug("Packet received: pkts_in=%d, pkt_len=%s", pkts_in, pkt_hdr.getlen())
                pkts_in += 1
                pkt_ts = timestamp(pkt_hdr)
                kafka_producer.produce(args.kafka_topic, key=pack_ts(pkt_ts), value=pkt_raw, callback=delivery_callback)

                # pretty print, if needed
                if args.pretty_print > 0 and pkts_in % args.pretty_print == 0:
                    print 'Packet received[%s]' % (pkts_in)

            # serve the callback queue
            kafka_producer.poll(0)

    finally:
        # flush all messages
        logging.info("Waiting for '%d' message(s) to flush", len(kafka_producer))
        kafka_producer.flush()

        # pkts_out may not be initialized if the callback was never executed
        pkts_out = 0
        if hasattr(delivery_callback, "pkts_out"):
            pkts_out = delivery_callback.pkts_out

        logging.info("'%d' packet(s) in, '%d' packet(s) out", pkts_in, pkts_out)
Esempio n. 5
0
class KafkaWorkflowCommunicationSender(object):
    _requires = ['confluent-kafka']

    def __init__(self, message_converter=ProtobufWorkflowCommunicationConverter):
        kafka_config = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_CONFIG
        self.producer = Producer(kafka_config)
        self.topic = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_TOPIC
        self.message_converter = message_converter

    def shutdown(self):
        self.producer.flush()

    @staticmethod
    def _delivery_callback(err, msg):
        if err is not None:
            logger.error('Kafka message delivery failed: {}'.format(err))

    def pause_workflow(self, workflow_execution_id):
        """Pauses a workflow currently executing.

        Args:
            workflow_execution_id (UUID): The execution ID of the workflow.
        """
        logger.info('Pausing workflow {0}'.format(workflow_execution_id))
        message = self.message_converter.create_workflow_pause_message(workflow_execution_id)
        self._send_workflow_communication_message(message, workflow_execution_id)

    def abort_workflow(self, workflow_execution_id):
        """Aborts a workflow currently executing.

        Args:
            workflow_execution_id (UUID): The execution ID of the workflow.
        """
        logger.info('Aborting running workflow {0}'.format(workflow_execution_id))
        message = self.message_converter.create_workflow_abort_message(workflow_execution_id)
        self._send_workflow_communication_message(message, workflow_execution_id)

    def send_exit_to_workers(self):
        """Sends the exit message over the communication sockets, otherwise worker receiver threads will hang"""
        message = self.message_converter.create_worker_exit_message()
        self._send_workflow_communication_message(message, None)

    def _send_workflow_communication_message(self, message, workflow_id):
        self._send_message(message, self.topic, workflow_id)

    def _send_message(self, message, topic, key):
        self.producer.produce(topic, message, key=key, callback=self._delivery_callback)
def test_produce_headers():
    """ Test produce() with timestamp arg """
    p = Producer({'socket.timeout.ms': 10,
                  'error_cb': error_cb,
                  'message.timeout.ms': 10})

    binval = pack('hhl', 1, 2, 3)

    headers_to_test = [
        [('headerkey', 'headervalue')],
        [('dupkey', 'dupvalue'), ('empty', ''), ('dupkey', 'dupvalue')],
        [('dupkey', 'dupvalue'), ('dupkey', 'diffvalue')],
        [('key_with_null_value', None)],
        [('binaryval', binval)],
        [('alreadyutf8', u'Småland'.encode('utf-8'))],
        [('isunicode', 'Jämtland')],

        {'headerkey': 'headervalue'},
        {'dupkey': 'dupvalue', 'empty': '', 'dupkey': 'dupvalue'},  # noqa: F601
        {'dupkey': 'dupvalue', 'dupkey': 'diffvalue'},  # noqa: F601
        {'key_with_null_value': None},
        {'binaryval': binval},
        {'alreadyutf8': u'Småland'.encode('utf-8')},
        {'isunicode': 'Jämtland'}
        ]

    for headers in headers_to_test:
        print('headers', type(headers), headers)
        p.produce('mytopic', value='somedata', key='a key', headers=headers)
        p.produce('mytopic', value='somedata', headers=headers)

    with pytest.raises(TypeError):
        p.produce('mytopic', value='somedata', key='a key', headers=('a', 'b'))

    with pytest.raises(TypeError):
        p.produce('mytopic', value='somedata', key='a key', headers=[('malformed_header')])

    with pytest.raises(TypeError):
        p.produce('mytopic', value='somedata', headers={'anint': 1234})

    p.flush()
def test_dr_msg_errstr():
    """
    Test that the error string for failed messages works (issue #129).
    The underlying problem is that librdkafka reuses the message payload
    for error value on Consumer messages, but on Producer messages the
    payload is the original payload and no rich error string exists.
    """
    p = Producer({"message.timeout.ms": 10})

    def handle_dr(err, msg):
        # Neither message payloads must not affect the error string.
        assert err is not None
        assert err.code() == KafkaError._MSG_TIMED_OUT
        assert "Message timed out" in err.str()

    # Unicode safe string
    p.produce('mytopic', "This is the message payload", on_delivery=handle_dr)

    # Invalid unicode sequence
    p.produce('mytopic', "\xc2\xc2", on_delivery=handle_dr)

    p.flush()
Esempio n. 8
0
class Publisher():

    def __init__(self, config={'bootstrap.servers': 'pulsing.jhk.org:9092', 'retries': 3, 'api.version.request': True}):
        super().__init__()
        self.__producer = Producer(config)
        self.logger = logging.getLogger(__name__)

    def publish(self, topic, data):
        self.logger.debug('publish %s - %s', topic, data)
        self.__producer.produce(topic, data.encode('utf-8'))
        self.__producer.flush()

    @property
    def producer(self):
        return self.__producer

    def __eq__(self, other):
        return self.__producer == other.__producer

    def __str__(self):
        return self.__producer.__str__()

    def __hash__(self):
        return self.__producer.__hash__()
Esempio n. 9
0
def test_consumer_rebalance_from_uncommitted_offset(requires_kafka):
    consumer_group = f"consumer-{uuid.uuid1().hex}"
    synchronize_commit_group = f"consumer-{uuid.uuid1().hex}"

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer(
        {
            "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
            "on_delivery": record_message_delivered,
        }
    )

    with create_topic(partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic, f"{i}".encode(), partition=i % 2)

        assert producer.flush(5) == 0, "producer did not successfully flush queue"

        for (topic, partition), offset in {
            (message.topic(), message.partition()): message.offset()
            for message in messages_delivered[topic]
        }.items():
            producer.produce(
                commit_log_topic,
                key=f"{topic}:{partition}:{synchronize_commit_group}".encode(),
                value=f"{offset + 1}".encode(),
            )

        assert producer.flush(5) == 0, "producer did not successfully flush queue"
        consumer_a = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(
            consumer_a,
            [lambda message: assignments_received[consumer_a], collect_messages_received(4)],
            10,
        )

        assert (
            len(assignments_received[consumer_a]) == 1
        ), "expected to receive partition assignment"
        assert {(i.topic, i.partition) for i in assignments_received[consumer_a][0]} == {
            (topic, 0),
            (topic, 1),
        }
        assignments_received[consumer_a].pop()

        message = consumer_a.poll(1)
        assert (
            message is None or message.error() is KafkaError._PARTITION_EOF
        ), "there should be no more messages to receive"

        consumer_b = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(
            consumer_a, [lambda message: assignments_received[consumer_a]], 10
        )

        consume_until_constraints_met(
            consumer_b,
            [lambda message: assignments_received[consumer_b], collect_messages_received(2)],
            10,
        )

        for consumer in [consumer_a, consumer_b]:
            assert len(assignments_received[consumer][0]) == 1

        message = consumer_a.poll(1)
        assert (
            message is None or message.error() is KafkaError._PARTITION_EOF
        ), "there should be no more messages to receive"

        message = consumer_b.poll(1)
        assert (
            message is None or message.error() is KafkaError._PARTITION_EOF
        ), "there should be no more messages to receive"
Esempio n. 10
0
#p.list_topics().topics


def receipt(err, msg):
    if err is not None:
        print('Error: {}'.format(err))
    else:
        print(
            "{} : Message on topic {} on partition {} with value of {}".format(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(msg.timestamp()[1] / 1000)),
                msg.topic(), msg.partition(),
                msg.value().decode('utf-8')))


for i in range(10):
    data = {
        "name": fake.name(),
        "age": fake.random_int(min=18, max=80, step=1),
        "street": fake.street_address(),
        "city": fake.city(),
        "state": fake.state(),
        "zip": fake.zipcode()
    }
    m = json.dumps(data)
    p.poll(0)
    p.produce('users', m.encode('utf-8'), callback=receipt)

p.flush()
Esempio n. 11
0
    def acked(err, msg):
        if err is not None:
            print("Failed to deliver message: {0}: {1}".format(
                msg.value(), err.str()))
        else:
            print("Message produced: {0}".format(msg.value()))

    p = Producer({'bootstrap.servers': '192.168.1.107:9092'})

    try:
        host = '192.168.***.***'  #client/consumer ip
        port = 9092
        server = ('192.168.***.***', 9092)  #server/producer ip
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.bind((host, port))

        # while (1):
        #     message = input("-> ")
        #     s.sendto(message.encode('utf-8'), server)
        # s.close()

        tt = input()
        p.produce('testtopic', '{0}'.format(tt), callback=acked)
        p.poll(0.5)

    except KeyboardInterrupt:
        pass

    p.flush(30)
    s.close
class KafkaTest:
    def __init__(self, kafkaAddress, schemaRegistryAddress,
                 kafkaConnectAddress, credentialPath, testVersion, enableSSL):
        self.testVersion = testVersion
        self.credentialPath = credentialPath
        with open(self.credentialPath) as f:
            credentialJson = json.load(f)
            testHost = credentialJson["host"]
            testUser = credentialJson["user"]
            testDatabase = credentialJson["database"]
            testSchema = credentialJson["schema"]
            testWarehouse = credentialJson["warehouse"]
            pk = credentialJson["encrypted_private_key"]
            pk_passphrase = credentialJson["private_key_passphrase"]

        self.TEST_DATA_FOLDER = "./test_data/"
        self.httpHeader = {
            'Content-type': 'application/json',
            'Accept': 'application/json'
        }

        self.SEND_INTERVAL = 0.01  # send a record every 10 ms
        self.VERIFY_INTERVAL = 60  # verify every 60 secs
        self.MAX_RETRY = 120  # max wait time 120 mins
        self.MAX_FLUSH_BUFFER_SIZE = 5000  # flush buffer when 10000 data was in the queue

        self.kafkaConnectAddress = kafkaConnectAddress
        self.schemaRegistryAddress = schemaRegistryAddress
        self.kafkaAddress = kafkaAddress

        if enableSSL:
            print(datetime.now().strftime("\n%H:%M:%S "), "=== Enable SSL ===")
            self.client_config = {
                "bootstrap.servers": kafkaAddress,
                "security.protocol": "SASL_SSL",
                "ssl.ca.location": "./crts/ca-cert",
                "sasl.mechanism": "PLAIN",
                "sasl.username": "******",
                "sasl.password": "******"
            }
        else:
            self.client_config = {"bootstrap.servers": kafkaAddress}

        self.adminClient = AdminClient(self.client_config)
        self.producer = Producer(self.client_config)
        sc_config = self.client_config
        sc_config['schema.registry.url'] = schemaRegistryAddress
        self.avroProducer = AvroProducer(sc_config)

        reg = "[^\/]*snowflakecomputing"  # find the account name
        account = re.findall(reg, testHost)
        if len(account) != 1 or len(account[0]) < 20:
            print(
                datetime.now().strftime("%H:%M:%S "),
                "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443"
            )

        pkb = parsePrivateKey(pk, pk_passphrase)
        self.snowflake_conn = snowflake.connector.connect(
            user=testUser,
            private_key=pkb,
            account=account[0][:-19],
            warehouse=testWarehouse,
            database=testDatabase,
            schema=testSchema)

    def msgSendInterval(self):
        # sleep self.SEND_INTERVAL before send the second message
        sleep(self.SEND_INTERVAL)

    def startConnectorWaitTime(self):
        sleep(10)

    def verifyWaitTime(self):
        # sleep two minutes before verify result in SF DB
        print(datetime.now().strftime("\n%H:%M:%S "),
              "=== Sleep {} secs before verify result in Snowflake DB ===".
              format(self.VERIFY_INTERVAL),
              flush=True)
        sleep(self.VERIFY_INTERVAL)

    def verifyWithRetry(self, func, round):
        retryNum = 0
        while retryNum < self.MAX_RETRY:
            try:
                func(round)
                break
            except test_suit.test_utils.ResetAndRetry:
                retryNum = 0
                print(datetime.now().strftime("%H:%M:%S "),
                      "=== Reset retry count and retry ===",
                      flush=True)
            except test_suit.test_utils.RetryableError as e:
                retryNum += 1
                print(datetime.now().strftime("%H:%M:%S "),
                      "=== Failed, retryable. {}===".format(e.msg),
                      flush=True)
                self.verifyWaitTime()
            except test_suit.test_utils.NonRetryableError as e:
                print(datetime.now().strftime("\n%H:%M:%S "),
                      "=== Non retryable error raised ===\n{}".format(e.msg),
                      flush=True)
                raise test_suit.test_utils.NonRetryableError()
            except snowflake.connector.errors.ProgrammingError as e:
                if e.errno == 2003:
                    retryNum += 1
                    print(datetime.now().strftime("%H:%M:%S "),
                          "=== Failed, table not created ===",
                          flush=True)
                    self.verifyWaitTime()
                else:
                    raise
        if retryNum == self.MAX_RETRY:
            print(datetime.now().strftime("\n%H:%M:%S "),
                  "=== Max retry exceeded ===",
                  flush=True)
            raise test_suit.test_utils.NonRetryableError()

    def createTopics(self, topicName, partitionNum=1, replicationNum=1):
        self.adminClient.create_topics(
            [NewTopic(topicName, partitionNum, replicationNum)])

    def sendBytesData(self, topic, value, key=[], partition=0, headers=[]):
        if len(key) == 0:
            for i, v in enumerate(value):
                self.producer.produce(topic,
                                      value=v,
                                      partition=partition,
                                      headers=headers)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        else:
            for i, (k, v) in enumerate(zip(key, value)):
                self.producer.produce(topic,
                                      value=v,
                                      key=k,
                                      partition=partition,
                                      headers=headers)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        self.producer.flush()

    def sendAvroSRData(self,
                       topic,
                       value,
                       value_schema,
                       key=[],
                       key_schema="",
                       partition=0):
        if len(key) == 0:
            for i, v in enumerate(value):
                self.avroProducer.produce(topic=topic,
                                          value=v,
                                          value_schema=value_schema,
                                          partition=partition)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        else:
            for i, (k, v) in enumerate(zip(key, value)):
                self.avroProducer.produce(topic=topic,
                                          value=v,
                                          value_schema=value_schema,
                                          key=k,
                                          key_schema=key_schema,
                                          partition=partition)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        self.avroProducer.flush()

    def cleanTableStagePipe(self,
                            connectorName,
                            topicName="",
                            partitionNumber=1):
        if topicName == "":
            topicName = connectorName
        tableName = topicName
        stageName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_STAGE_{}".format(
            connectorName, topicName)

        print(datetime.now().strftime("\n%H:%M:%S "),
              "=== Drop table {} ===".format(tableName))
        self.snowflake_conn.cursor().execute(
            "DROP table IF EXISTS {}".format(tableName))

        print(datetime.now().strftime("%H:%M:%S "),
              "=== Drop stage {} ===".format(stageName))
        self.snowflake_conn.cursor().execute(
            "DROP stage IF EXISTS {}".format(stageName))

        for p in range(partitionNumber):
            pipeName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_PIPE_{}_{}".format(
                connectorName, topicName, p)
            print(datetime.now().strftime("%H:%M:%S "),
                  "=== Drop pipe {} ===".format(pipeName))
            self.snowflake_conn.cursor().execute(
                "DROP pipe IF EXISTS {}".format(pipeName))

        print(datetime.now().strftime("%H:%M:%S "), "=== Done ===", flush=True)

    def verifyStageIsCleaned(self, connectorName, topicName=""):
        if topicName == "":
            topicName = connectorName
        stageName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_STAGE_{}".format(
            connectorName, topicName)

        res = self.snowflake_conn.cursor().execute(
            "list @{}".format(stageName)).fetchone()
        if res is not None:
            raise RetryableError("stage not cleaned up ")

    # validate content match gold regex
    def regexMatchOneLine(self, res, goldMetaRegex, goldContentRegex):
        meta = res[0].replace(" ", "").replace("\n", "")
        content = res[1].replace(" ", "").replace("\n", "")
        goldMetaRegex = "^" + goldMetaRegex.replace("\"", "\\\"").replace("{", "\\{").replace("}", "\\}") \
            .replace("[", "\\[").replace("]", "\\]").replace("+", "\\+") + "$"
        goldContentRegex = "^" + goldContentRegex.replace("\"", "\\\"").replace("{", "\\{").replace("}", "\\}") \
            .replace("[", "\\[").replace("]", "\\]").replace("+", "\\+") + "$"
        if re.search(goldMetaRegex, meta) is None:
            raise test_suit.test_utils.NonRetryableError(
                "Record meta data:\n{}\ndoes not match gold regex "
                "label:\n{}".format(meta, goldMetaRegex))
        if re.search(goldContentRegex, content) is None:
            raise test_suit.test_utils.NonRetryableError(
                "Record content:\n{}\ndoes not match gold regex "
                "label:\n{}".format(content, goldContentRegex))

    def updateConnectorConfig(self, fileName, connectorName, configMap):
        with open('./rest_request_generated/' + fileName + '.json') as f:
            c = json.load(f)
            config = c['config']
            for k in configMap:
                config[k] = configMap[k]
        requestURL = "http://{}/connectors/{}/config".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.put(requestURL, json=config, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r,
              " updated connector config")

    def restartConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}/restart".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.post(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " restart connector")

    def pauseConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}/pause".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.put(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " pause connector")

    def resumeConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}/resume".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.put(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " resume connector")

    def deleteConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}".format(self.kafkaConnectAddress,
                                                      connectorName)
        r = requests.delete(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " delete connector")

    def closeConnector(self, fileName, nameSalt):
        snowflake_connector_name = fileName.split(".")[0] + nameSalt
        delete_url = "http://{}/connectors/{}".format(
            self.kafkaConnectAddress, snowflake_connector_name)
        print(datetime.now().strftime("\n%H:%M:%S "),
              "=== Delete connector {} ===".format(snowflake_connector_name))
        code = requests.delete(delete_url, timeout=10).status_code
        print(datetime.now().strftime("%H:%M:%S "), code)

    def createConnector(self, fileName, nameSalt):
        rest_template_path = "./rest_request_template"
        rest_generate_path = "./rest_request_generated"

        with open(self.credentialPath) as f:
            credentialJson = json.load(f)
            testHost = credentialJson["host"]
            testUser = credentialJson["user"]
            testDatabase = credentialJson["database"]
            testSchema = credentialJson["schema"]
            pk = credentialJson["private_key"]

        print(
            datetime.now().strftime("\n%H:%M:%S "),
            "=== generate sink connector rest reqeuest from {} ===".format(
                rest_template_path))
        if not os.path.exists(rest_generate_path):
            os.makedirs(rest_generate_path)
        snowflake_connector_name = fileName.split(".")[0] + nameSalt

        print(
            datetime.now().strftime("\n%H:%M:%S "),
            "=== Connector Config JSON: {}, Connector Name: {} ===".format(
                fileName, snowflake_connector_name))
        with open("{}/{}".format(rest_template_path, fileName), 'r') as f:
            config = f.read() \
                .replace("SNOWFLAKE_PRIVATE_KEY", pk) \
                .replace("SNOWFLAKE_HOST", testHost) \
                .replace("SNOWFLAKE_USER", testUser) \
                .replace("SNOWFLAKE_DATABASE", testDatabase) \
                .replace("SNOWFLAKE_SCHEMA", testSchema) \
                .replace("CONFLUENT_SCHEMA_REGISTRY", self.schemaRegistryAddress) \
                .replace("SNOWFLAKE_TEST_TOPIC", snowflake_connector_name) \
                .replace("SNOWFLAKE_CONNECTOR_NAME", snowflake_connector_name)
            with open("{}/{}".format(rest_generate_path, fileName), 'w') as fw:
                fw.write(config)

        MAX_RETRY = 20
        retry = 0
        delete_url = "http://{}/connectors/{}".format(
            self.kafkaConnectAddress, snowflake_connector_name)
        post_url = "http://{}/connectors".format(self.kafkaConnectAddress)
        while retry < MAX_RETRY:
            try:
                code = requests.delete(delete_url, timeout=10).status_code
                if code == 404 or code == 200 or code == 201:
                    break
            except:
                pass
            print(
                datetime.now().strftime("\n%H:%M:%S "),
                "=== sleep for 30 secs to wait for kafka connect to accept connection ==="
            )
            sleep(30)
            retry += 1
        if retry == MAX_RETRY:
            errorExit(
                "\n=== max retry exceeded, kafka connect not ready in 10 mins ==="
            )

        r = requests.post(post_url,
                          json=json.loads(config),
                          headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "),
              json.loads(r.content.decode("utf-8"))["name"], r.status_code)
Esempio n. 13
0
class Uploader():
    def __init__(self, host, port, api_key, experiment, run_id):

        # Store some variables
        self.host = host
        self.port = port
        self.experiment = experiment
        self.run_id = run_id
        self.api_key = api_key
        self.rank_id = ''.join(
            random.choice(string.ascii_lowercase) for i in range(8))

        # Connect to the Kafka broker
        self.kafka_producer = Producer({
            'bootstrap.servers':
            self.host + ':' + str(self.port),
            'sasl.username':
            '******',
            'sasl.password':
            api_key,
            'security.protocol':
            'sasl_plaintext',
            'sasl.mechanism':
            'PLAIN',
        })

        # Announce the run
        announcement = {
            'type': MType.ANNOUNCE_CREATE.value,
            'experiment': self.experiment,
            'run_id': self.run_id,
            'rank_id': self.rank_id,
        }
        self.kafka_producer.produce('announce',
                                    key=str(time.time()),
                                    value=msgpack.packb(announcement))
        self.kafka_producer.flush(30)

        # Register the at_exit death call
        atexit.register(self.cleanup)

    def __call__(self, frame):
        # Publish the frame on the topic
        self.kafka_producer.produce(str(self.run_id),
                                    key=str(time.time()),
                                    value=msgpack.packb(frame))

        return 200

    def flush(self, timeout=10):
        self.kafka_producer.flush(timeout)

    def cleanup(self, ):
        announcement = {
            'type': MType.ANNOUNCE_DIE.value,
            'experiment': self.experiment,
            'run_id': self.run_id,
            'rank_id': self.rank_id,
        }
        self.kafka_producer.produce('announce',
                                    key=str(time.time()),
                                    value=msgpack.packb(announcement))
        self.kafka_producer.flush()
Esempio n. 14
0
class KafkaHelper(object):
    def __init__(self, target_landscape='custom', config_section=None):
        self.kafka_config = {
            'bootstrap.servers': svt.conf.get('kafka', 'bootstrap.servers')
        }
        self.producer = None
        log.info("Initialized new KafkaHelper object with config: "
                 f"{self.kafka_config}")

    def publish(self, topic: str, message: Union[dict, str]) -> None:
        """Posts the passed message to the target Kafka topic.

        :param topic: Identifier of the target topic
        :param message: Message in a dictionary or string format
        """
        assert isinstance(message, str) or isinstance(message, dict)

        if not self.producer:
            self.producer = Producer(self.kafka_config)

        if isinstance(message, dict):
            message = json.dumps(message)
        # Asynchronous message producing
        self.producer.produce(topic, message.encode('utf-8'))
        self.producer.flush()
        log.info(f"Posted a document to kafka topic: {topic}")

    def consume_forever(self, group_id: str, topics: List[str],
                        callback_functions: List[Callable]) -> None:
        """

        :param group_id:
        :param topics:
        :param callback_functions:
        :return:
        """
        assert len(topics) == len(callback_functions)
        callbacks = dict(zip(topics, callback_functions))
        self.kafka_config.update({
            'group.id': group_id,
            'auto.offset.reset': 'earliest'
        })
        c = Consumer(self.kafka_config)
        c.subscribe(topics)
        # Read messages
        try:
            while True:
                msg = c.poll(timeout=1.0)
                if not msg:
                    log.info(
                        "There was no message on the subscribed Kafka topics!")
                elif msg.error():
                    raise KafkaException(msg.error())
                else:
                    message = json.loads(msg.value().decode('utf-8'))
                    callbacks[msg.topic()](message)

        except Exception as error:
            log.error(
                f"Unexpected event occurred! Error: {traceback.format_exc()}")
        finally:
            # Shut down the consumer to commit the current offsets
            c.close()
Esempio n. 15
0
class CompetitionProducer:
    """

    """
    daemon = True
    producer = None

    def __init__(self, server):
        conf = {'bootstrap.servers': server}
        self.producer = Producer(conf)  # Create producer

    # message must be in byte format
    def send(self, topic, message):
        self.producer.produce(topic, message)  # Sending messages to a certain topic
        self.producer.poll(timeout=0)

    def main(self, topic, initial_batch, items, predictions, initial_training_time, batch_size, time_interval,
             predictions_time_interval, spark_topic, competition_id):

        """
        Recreates the stream. Sends the data in batches: first test (without the target value) and then train batches.
        All batches are sent according to the time intervals set for the current competition.

        :param topic:
        :param initial_batch:
        :param items:
        :param predictions:
        :param initial_training_time:
        :param batch_size:
        :param time_interval:
        :param predictions_time_interval:
        :param spark_topic:
        :param competition_id:
        :return:
        """

        for item in initial_batch:
            try:
                # Send row by row from initial batch as json
                self.send(topic, orjson.dumps(item))
            except Exception as e:
                # Check if topic exists, if not, create it and then send
                print(e)

        # After sending initial batch, sleep for initial training time
        time.sleep(int(initial_training_time))
        # Creating lists of batch size, one for test items with just values and second with predictions for training
        test_groups = list(self.chunker(items, batch_size))
        train_groups = list(self.chunker(predictions, batch_size))

        i = -1

        # Accessing each group in the list test_groups
        for group in test_groups:
            # In parallel accessing the predictions
            # Adding tag, deadline and released at to every item in train group / prediction
            released_at = datetime.datetime.now()
            # for item in test group add tag, deadline and released
            for item in group:
                item['tag'] = 'TEST'
                item['Deadline'] = str(released_at + datetime.timedelta(seconds=int(predictions_time_interval)))
                item['Released'] = str(released_at)
                item['competition_id'] = str(competition_id)
                # Sending testing items
                try:
                    self.send(topic, orjson.dumps(item))

                except Exception as e:
                    print(e)

            i = i + 1
            train_group = train_groups[i]
            for item in train_group:
                deadline = released_at + datetime.timedelta(seconds=int(predictions_time_interval))
                item['Deadline'] = deadline.strftime("%Y-%m-%d %H:%M:%S")
                item['Released'] = released_at.strftime("%Y-%m-%d %H:%M:%S")
                item['competition_id'] = competition_id
                try:
                    self.send(spark_topic, orjson.dumps(item))
                except Exception as e:
                    print(e)

            time.sleep(time_interval)

            for item in train_group:
                item['tag'] = 'TRAIN'
                item['Deadline'] = released_at + datetime.timedelta(seconds=int(predictions_time_interval))
                item['Released'] = released_at
                try:
                    self.send(topic, orjson.dumps(item, default=json_util.default))
                except Exception as e:
                    print(e)

        time.sleep(time_interval)

        self.producer.flush()

    @staticmethod
    def chunker(seq, size):
        """ Returns data in chunks (batches) of a given size. """
        return (seq[pos:pos + size] for pos in range(0, len(seq), size))

    @staticmethod
    def is_not_empty(row):
        """Check if row is empty."""
        return all(item == "" for item in row)


    def create_competition(self, competition, items, predictions, initial_batch):
        """Create a competition and start releasing the data stream."""
        self.main(
            topic=competition.name.lower().replace(" ", ""),
            initial_training_time=competition.initial_training_time,
            initial_batch=initial_batch,
            items=items,
            predictions=predictions,
            batch_size=competition.batch_size,
            time_interval=competition.time_interval,
            predictions_time_interval=competition.predictions_time_interval,
            spark_topic=competition.name.lower().replace(" ", "") + 'spark_train',
            competition_id=competition.competition_id)
Esempio n. 16
0
class KafkaStreamingClient(AbstractStreamingClient):
    """Kafka streaming client."""
    def __init__(self, config):  # pragma: no cover
        """
        Streaming client implementation based on Kafka.

        Configuration keys:
          KAFKA_ADDRESS
          KAFKA_CONSUMER_GROUP
          KAFKA_TOPIC
          TIMEOUT
          EVENTHUB_KAFKA_CONNECTION_STRING
        """
        self.logger = Logger()

        self.topic = config.get("KAFKA_TOPIC")
        if config.get("TIMEOUT"):
            try:
                self.timeout = int(config.get("TIMEOUT"))
            except ValueError:
                self.timeout = None
        else:
            self.timeout = None

        kafka_config = self.create_kafka_config(config)
        self.admin = admin.AdminClient(kafka_config)

        if config.get("KAFKA_CONSUMER_GROUP") is None:
            self.logger.info('Creating Producer')
            self.producer = Producer(kafka_config)
        else:
            self.logger.info('Creating Consumer')
            self.consumer = Consumer(kafka_config)

    @staticmethod
    def create_kafka_config(user_config: dict) -> dict:  # pragma: no cover
        """Create the kafka configuration."""
        config = {
            "bootstrap.servers": user_config.get("KAFKA_ADDRESS"),
            "enable.auto.commit": False,
            "auto.offset.reset": "earliest",
            "default.topic.config": {
                'auto.offset.reset': 'smallest'
            },
        }

        if user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'):
            ssl_location = user_config.get(
                'SSL_CERT_LOCATION') or '/etc/ssl/certs/ca-certificates.crt'
            eventhub_config = {
                'security.protocol':
                "SASL_SSL",
                'sasl.mechanism':
                "PLAIN",
                'ssl.ca.location':
                ssl_location,
                'sasl.username':
                '******',
                'sasl.password':
                user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'),
                'client.id':
                'agogosml',
            }
            config = {**config, **eventhub_config}

        if user_config.get('KAFKA_CONSUMER_GROUP') is not None:
            config['group.id'] = user_config['KAFKA_CONSUMER_GROUP']

        if user_config.get('KAFKA_DEBUG') is not None:
            config['debug'] = user_config['KAFKA_DEBUG']

        return config

    def delivery_report(self, err, msg):  # pragma: no cover
        """
        Indicate delivery result.

        Called once for each message produced. Triggered by poll() or flush().

        :param err: An error message.
        :param msg: A string input to be uploaded to kafka.
        """
        if err is not None:
            self.logger.error('Message delivery failed: %s', err)
        else:
            self.logger.info('Message delivered to %s [%s]', msg.topic(),
                             msg.partition())

    def send(self, message: str):  # pragma: no cover
        if not isinstance(message, str):
            raise TypeError('str type expected for message')
        try:
            mutated_message = message.encode('utf-8')
            self.logger.info('Sending message to kafka topic: %s', self.topic)
            self.producer.poll(0)
            self.producer.produce(self.topic,
                                  mutated_message,
                                  callback=self.delivery_report)
            self.producer.flush()
            return True
        except Exception as ex:
            self.logger.error('Error sending message to kafka: %s', ex)
            return False

    def stop(self):  # pragma: no cover
        pass

    def check_timeout(self, start: datetime):  # pragma: no cover
        """Interrupts if too much time has elapsed since the kafka client started running."""
        if self.timeout is not None:
            elapsed = datetime.now() - start
            if elapsed.seconds >= self.timeout:
                raise KeyboardInterrupt

    def handle_kafka_error(self, msg):  # pragma: no cover
        """Handle an error in kafka."""
        if msg.error().code() == KafkaError._PARTITION_EOF:
            # End of partition event
            self.logger.info('%% %s [%d] reached end at offset %d\n',
                             msg.topic(), msg.partition(), msg.offset())
        else:
            # Error
            raise KafkaException(msg.error())

    def start_receiving(self,
                        on_message_received_callback):  # pragma: no cover
        try:
            self.subscribe_to_topic()
            start = datetime.now()

            while True:
                # Stop loop after timeout if exists
                self.check_timeout(start)

                # Poll messages from topic
                msg = self.read_single_message()
                if msg is not None:
                    on_message_received_callback(msg)

        except KeyboardInterrupt:
            self.logger.info('Aborting listener...')

        finally:
            # Close down consumer to commit final offsets.
            self.consumer.close()

    def subscribe_to_topic(self):  # pragma: no cover
        """Subscribe to topic."""
        self.consumer.subscribe([self.topic])

    def read_single_message(self):  # pragma: no cover
        """Poll messages from topic."""
        msg = self.consumer.poll(0.000001)

        if msg is None:
            return None

        if msg.error():
            # Error or event
            self.handle_kafka_error(msg)
            return None

        # Proper message
        # self.logger.info('kafka read message: %s, from topic: %s', msg.value(), msg.topic())
        self.consumer.commit(msg)
        return msg.value()
Esempio n. 17
0
def test_consumer_start_from_committed_offset():
    consumer_group = "consumer-{}".format(uuid.uuid1().hex)
    synchronize_commit_group = "consumer-{}".format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
        "on_delivery": record_message_delivered,
    })

    with create_topic() as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(3):
            producer.produce(topic, "{}".format(i).encode("utf8"))

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        Consumer({
            "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
            "group.id": consumer_group
        }).commit(message=messages_delivered[topic][0], asynchronous=False)

        # Create the synchronized consumer.
        consumer = SynchronizedConsumer(
            bootstrap_servers=os.environ["SENTRY_KAFKA_HOSTS"],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = []

        def on_assign(c, assignment):
            assert c is consumer
            assignments_received.append(assignment)

        consumer.subscribe([topic], on_assign=on_assign)

        # Wait until we have received our assignments.
        for i in xrange(10):  # this takes a while
            assert consumer.poll(1) is None
            if assignments_received:
                break

        assert len(assignments_received
                   ) == 1, "expected to receive partition assignment"
        assert set((i.topic, i.partition)
                   for i in assignments_received[0]) == set([(topic, 0)])

        # TODO: Make sure that all partitions are paused on assignment.

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0]
        producer.produce(
            commit_log_topic,
            key="{}:{}:{}".format(message.topic(), message.partition(),
                                  synchronize_commit_group).encode("utf8"),
            value="{}".format(message.offset() + 1).encode("utf8"),
        )

        # Make sure that there are no messages ready to consume.
        assert consumer.poll(1) is None

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0 + 1]  # second message
        producer.produce(
            commit_log_topic,
            key="{}:{}:{}".format(message.topic(), message.partition(),
                                  synchronize_commit_group).encode("utf8"),
            value="{}".format(message.offset() + 1).encode("utf8"),
        )

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        # We should have received a single message.
        # TODO: Can we also assert that the position is unpaused?)
        for i in xrange(5):
            message = consumer.poll(1)
            if message is not None:
                break

        assert message is not None, "no message received"

        expected_message = messages_delivered[topic][0 + 1]  # second message
        assert message.topic() == expected_message.topic()
        assert message.partition() == expected_message.partition()
        assert message.offset() == expected_message.offset()

        # We should not be able to continue reading into the topic.
        # TODO: Can we assert that the position is paused?
        assert consumer.poll(1) is None
Esempio n. 18
0
class KafkaProducer(GenericProducer):
    """Kafka Single Topic Producer.

    Parameters
    ----------
    PARAMS: dict
        Parameters passed to :class:`confluent_kafka.Producer`

        The required parameters are:

        - *bootstrap.servers*: comma separated <host:port> :class:`string` to brokers.

    TOPIC: string
        Kafka fixed output topic.

        *Example:*

        Depending on the step configuration the producer config can be passsed in different ways, the recommended one
        is passing it on the `STEP_CONFIG` variable.

        .. code-block:: python

            #settings.py
            PRODUCER_CONFIG = {
                "PARAMS": {
                    "bootstrap.servers": "kafka1:9092, kafka2:9092",
                },
                "TOPIC": "test_topic"
            }

            STEP_CONFIG = { ...
                "PRODUCER_CONFIG": PRODUCER_CONFIG
            }

        If multiple producers are required, the varible inside `STEP_CONFIG` can be changed to "PRODUCER1_CONFIG", "PRODUCER2_CONFIG", etc.

    TOPIC_STRATEGY: dict

        Using a topic strategy instead of a fixed topic. Similar to the consumers topic strategy, the required parameters are:

        - *CLASS*: `apf.core.topic_management.GenericTopicStrategy` class to be used.
        - *PARAMS*: Parameters passed to *CLASS* object.

        **Example:**

        Produce to a topic that updates on 23 hours UTC every day.

        .. code-block:: python

            #settings.py
            PRODUCER_CONFIG = { ...
                "TOPIC_STRATEGY": {
                    "CLASS": "apf.core.topic_management.DailyTopicStrategy",
                    "PARAMS": {
                        "topic_format": "test_%s",
                        "date_format": "%Y%m%d",
                        "change_hour": 23
                    }
                }
            }

            STEP_CONFIG = { ...
                "PRODUCER_CONFIG": PRODUCER_CONFIG
            }

    SCHEMA: dict
        AVRO Output Schema `(AVRO Schema Definition) <https://avro.apache.org/docs/current/gettingstartedpython.html#Defining+a+schema>`_

        **Example:**

        .. code-block:: python

            #settings.py
            PRODUCER_CONFIG = { ...
                "SCHEMA": {
                    "namespace": "example.avro",
                    "type": "record",
                    "name": "User",
                    "fields": [
                        {"name": "name", "type": "string"},
                        {"name": "favorite_number",  "type": ["int", "null"]},
                        {"name": "favorite_color", "type": ["string", "null"]}
                    ]
                }
            }
    """
    def __init__(self,config):
        super().__init__(config=config)
        self.producer = Producer(self.config["PARAMS"])
        self.schema = self.config["SCHEMA"]

        self.schema = fastavro.parse_schema(self.schema)

        self.dynamic_topic = False
        if self.config.get("TOPIC"):
            self.logger.info(f'Producing to {self.config["TOPIC"]}')
            self.topic = [self.config["TOPIC"]]
        elif self.config.get("TOPIC_STRATEGY"):
            self.dynamic_topic = True
            module_name, class_name = self.config["TOPIC_STRATEGY"]["CLASS"].rsplit(".", 1)
            TopicStrategy = getattr(importlib.import_module(module_name), class_name)
            self.topic_strategy = TopicStrategy(**self.config["TOPIC_STRATEGY"]["PARAMS"])
            self.topic = self.topic_strategy.get_topic()
            self.logger.info(f'Using {self.config["TOPIC_STRATEGY"]}')
            self.logger.info(f'Producing to {self.topic}')
            self.consumer.subscribe(self.topic)

    def produce(self,message=None):
        """Produce Message to a topic.
        """
        out = io.BytesIO()
        fastavro.writer(out, self.schema, [message])
        avro_message = out.getvalue()

        if self.dynamic_topic:
            topics = self.topic_strategy.get_topic()
            if self.topic != topics:
                self.topic = topics

        for topic in self.topic:
            self.producer.produce(topic,avro_message)

    def __del__(self):
        self.logger.info("Waiting to produce last messages")
        self.producer.flush()
Esempio n. 19
0
    def test_ingester(self):

        init_db_sync(config=config, verbose=True)

        log("Setting up paths")
        # path_kafka = pathlib.Path(config["path"]["kafka"])

        path_logs = pathlib.Path(config["path"]["logs"])
        if not path_logs.exists():
            path_logs.mkdir(parents=True, exist_ok=True)

        if config["misc"]["broker"]:
            log("Setting up test groups and filters in Fritz")
            program = Program(group_name="FRITZ_TEST", group_nickname="test")
            Filter(
                collection="ZTF_alerts",
                group_id=program.group_id,
                filter_id=program.filter_id,
            )

            program2 = Program(group_name="FRITZ_TEST_AUTOSAVE", group_nickname="test2")
            Filter(
                collection="ZTF_alerts",
                group_id=program2.group_id,
                filter_id=program2.filter_id,
                autosave=True,
                pipeline=[{"$match": {"objectId": "ZTF20aaelulu"}}],
            )

            program3 = Program(
                group_name="FRITZ_TEST_UPDATE_ANNOTATIONS", group_nickname="test3"
            )
            Filter(
                collection="ZTF_alerts",
                group_id=program3.group_id,
                filter_id=program3.filter_id,
                update_annotations=True,
                pipeline=[
                    {"$match": {"objectId": "ZTF20aapcmur"}}
                ],  # there are 3 alerts in the test set for this oid
            )

        # clean up old Kafka logs
        log("Cleaning up Kafka logs")
        subprocess.run(["rm", "-rf", path_logs / "kafka-logs", "/tmp/zookeeper"])

        log("Starting up ZooKeeper at localhost:2181")

        # start ZooKeeper in the background
        cmd_zookeeper = [
            os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-start.sh"),
            "-daemon",
            os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"),
        ]

        with open(path_logs / "zookeeper.stdout", "w") as stdout_zookeeper:
            # p_zookeeper =
            subprocess.run(
                cmd_zookeeper, stdout=stdout_zookeeper, stderr=subprocess.STDOUT
            )

        # take a nap while it fires up
        time.sleep(3)

        log("Starting up Kafka Server at localhost:9092")

        # start the Kafka server:
        cmd_kafka_server = [
            os.path.join(config["path"]["kafka"], "bin", "kafka-server-start.sh"),
            "-daemon",
            os.path.join(config["path"]["kafka"], "config", "server.properties"),
        ]

        with open(
            os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w"
        ) as stdout_kafka_server:
            # p_kafka_server = subprocess.Popen(cmd_kafka_server, stdout=stdout_kafka_server, stderr=subprocess.STDOUT)
            # p_kafka_server =
            subprocess.run(cmd_kafka_server)

        # take a nap while it fires up
        time.sleep(3)

        # get kafka topic names with kafka-topics command
        cmd_topics = [
            os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"),
            "--zookeeper",
            config["kafka"]["zookeeper.test"],
            "-list",
        ]

        topics = (
            subprocess.run(cmd_topics, stdout=subprocess.PIPE)
            .stdout.decode("utf-8")
            .split("\n")[:-1]
        )
        log(f"Found topics: {topics}")

        # create a test ZTF topic for the current UTC date
        date = datetime.datetime.utcnow().strftime("%Y%m%d")
        topic_name = f"ztf_{date}_programid1_test"

        if topic_name in topics:
            # topic previously created? remove first
            cmd_remove_topic = [
                os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"),
                "--zookeeper",
                config["kafka"]["zookeeper.test"],
                "--delete",
                "--topic",
                topic_name,
            ]
            # print(kafka_cmd)
            remove_topic = (
                subprocess.run(cmd_remove_topic, stdout=subprocess.PIPE)
                .stdout.decode("utf-8")
                .split("\n")[:-1]
            )
            log(f"{remove_topic}")
            log(f"Removed topic: {topic_name}")
            time.sleep(1)

        if topic_name not in topics:
            log(f"Creating topic {topic_name}")

            cmd_create_topic = [
                os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"),
                "--create",
                "--bootstrap-server",
                config["kafka"]["bootstrap.test.servers"],
                "--replication-factor",
                "1",
                "--partitions",
                "1",
                "--topic",
                topic_name,
            ]
            with open(
                os.path.join(config["path"]["logs"], "create_topic.stdout"), "w"
            ) as stdout_create_topic:
                # p_create_topic = \
                subprocess.run(
                    cmd_create_topic,
                    stdout=stdout_create_topic,
                    stderr=subprocess.STDOUT,
                )

        log("Starting up Kafka Producer")

        # spin up Kafka producer
        producer = Producer(
            {"bootstrap.servers": config["kafka"]["bootstrap.test.servers"]}
        )

        # small number of alerts that come with kowalski
        path_alerts = pathlib.Path("/app/data/ztf_alerts/20200202/")
        # grab some more alerts from gs://ztf-fritz/sample-public-alerts
        try:
            log("Grabbing more alerts from gs://ztf-fritz/sample-public-alerts")
            r = requests.get("https://www.googleapis.com/storage/v1/b/ztf-fritz/o")
            aa = r.json()["items"]
            ids = [pathlib.Path(a["id"]).parent for a in aa if "avro" in a["id"]]
        except Exception as e:
            log(
                "Grabbing alerts from gs://ztf-fritz/sample-public-alerts failed, but it is ok"
            )
            log(f"{e}")
            ids = []
        subprocess.run(
            [
                "gsutil",
                "-m",
                "cp",
                "-n",
                "gs://ztf-fritz/sample-public-alerts/*.avro",
                "/app/data/ztf_alerts/20200202/",
            ]
        )
        log(f"Fetched {len(ids)} alerts from gs://ztf-fritz/sample-public-alerts")
        # push!
        for p in path_alerts.glob("*.avro"):
            with open(str(p), "rb") as data:
                # Trigger any available delivery report callbacks from previous produce() calls
                producer.poll(0)

                log(f"Pushing {p}")

                # Asynchronously produce a message, the delivery report callback
                # will be triggered from poll() above, or flush() below, when the message has
                # been successfully delivered or failed permanently.
                producer.produce(topic_name, data.read(), callback=delivery_report)

                # Wait for any outstanding messages to be delivered and delivery report
                # callbacks to be triggered.
        producer.flush()

        log("Starting up Ingester")

        # digest and ingest
        watchdog(obs_date=date, test=True)
        log("Digested and ingested: all done!")

        # shut down Kafka server and ZooKeeper
        time.sleep(20)

        log("Shutting down Kafka Server at localhost:9092")
        # start the Kafka server:
        cmd_kafka_server_stop = [
            os.path.join(config["path"]["kafka"], "bin", "kafka-server-stop.sh"),
            os.path.join(config["path"]["kafka"], "config", "server.properties"),
        ]

        with open(
            os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w"
        ) as stdout_kafka_server:
            # p_kafka_server_stop = \
            subprocess.run(
                cmd_kafka_server_stop,
                stdout=stdout_kafka_server,
                stderr=subprocess.STDOUT,
            )

        log("Shutting down ZooKeeper at localhost:2181")
        cmd_zookeeper_stop = [
            os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-stop.sh"),
            os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"),
        ]

        with open(
            os.path.join(config["path"]["logs"], "zookeeper.stdout"), "w"
        ) as stdout_zookeeper:
            # p_zookeeper_stop = \
            subprocess.run(
                cmd_zookeeper_stop, stdout=stdout_zookeeper, stderr=subprocess.STDOUT
            )

        log("Checking the ZTF alert collection states")
        mongo = Mongo(
            host=config["database"]["host"],
            port=config["database"]["port"],
            replica_set=config["database"]["replica_set"],
            username=config["database"]["username"],
            password=config["database"]["password"],
            db=config["database"]["db"],
            verbose=True,
        )
        collection_alerts = config["database"]["collections"]["alerts_ztf"]
        collection_alerts_aux = config["database"]["collections"]["alerts_ztf_aux"]
        n_alerts = mongo.db[collection_alerts].count_documents({})
        assert n_alerts == 313
        n_alerts_aux = mongo.db[collection_alerts_aux].count_documents({})
        assert n_alerts_aux == 145

        if config["misc"]["broker"]:
            log("Checking that posting to SkyPortal succeeded")

            # check number of candidates that passed the first filter
            resp = requests.get(
                program.base_url + f"/api/candidates?groupIDs={program.group_id}",
                headers=program.headers,
                timeout=3,
            )

            assert resp.status_code == requests.codes.ok
            result = resp.json()
            assert result["status"] == "success"
            assert "data" in result
            assert "totalMatches" in result["data"]
            assert result["data"]["totalMatches"] == 88

            # check that the only candidate that passed the second filter (ZTF20aaelulu) got saved as Source
            resp = requests.get(
                program2.base_url + f"/api/sources?group_ids={program2.group_id}",
                headers=program2.headers,
                timeout=3,
            )

            assert resp.status_code == requests.codes.ok
            result = resp.json()
            assert result["status"] == "success"
            assert "data" in result
            assert "totalMatches" in result["data"]
            assert result["data"]["totalMatches"] == 1
            assert "sources" in result["data"]
            assert result["data"]["sources"][0]["id"] == "ZTF20aaelulu"
Esempio n. 20
0
 def send(self, message):
     p = Producer({'boostrap.server': settings.KAFKA['bootstrap.servers']})
     p.produce('CoinPrices', key='coin', value=message)
     p.flush(30)
def producer_trigger(raw_data, context):
    state_stats_url = ('https://api.covid19india.org/data.json')
    district_stats_url = ('https://api.covid19india.org/v2/state_district_wise.json')
    bootstrap_servers = "localhost:9092"
    kafka_district_data_topic_name = "district-data"
    kafka_processed_data_topic_name = "processed-data"
   

    conf = {'bootstrap.servers': bootstrap_servers}

    producer = Producer(conf, logger=logger)

    # import raw district data
    district_data = requests.get(district_stats_url).json()
    for data in district_data:
        state = data['state']
        district_data = data['districtData']
        for dd in district_data:
            district = dd['district']
            key = dict({'state': state, 'district': district})
            value = dict({'state': state, 'district': district, 'active': dd['active'], 'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']
                          })
            try:
                producer.produce(topic=kafka_district_data_topic_name, value=json.dumps(value), key=json.dumps(key),
                                 on_delivery=fail)
            except BufferError:
                logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' %
                             len(producer))
            producer.poll(0)
    logger.info('%% Waiting for %d deliveries\n' % len(producer))
    producer.flush()

    district_data = requests.get(district_stats_url).json()
    for data in district_data:
        state = data['state']
        district_data = data['districtData']
        finalDict = {}
        for dd in district_data:
            district = dd['district']
            key = dict({'state': state, 'district': district})
            if(dd['active'] < 200):
                finalDict.update({ 'low_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'],'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']})})
            elif(dd['active'] > 200 and dd['active'] < 800):
                finalDict.update({ 'moderate_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']})})
            elif(dd['active'] > 800):
                finalDict.update({ 'high_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']})})
            try:
                producer.produce(topic=kafka_processed_data_topic_name, value=json.dumps(finalDict), key=json.dumps(key),
                                 on_delivery=fail)
            except BufferError:
                logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' %
                             len(producer))
            producer.poll(0)
    logger.info('%% Waiting for %d deliveries\n' % len(producer))
    producer.flush()
Esempio n. 22
0
import pyodbc
from confluent_kafka import Producer

print ('---login--- ')

conn = pyodbc.connect("DRIVER={ODBC Driver 17 for SQL Server};SERVER=DevSQL01;DATABASE=HackStream;UID=greatscott;PWD=H4ppyFunB4ll;")

cursor = conn.cursor()
cursor.execute('SELECT MAX(ActionId) FROM HackStream.dbo.Funnel')

for row in cursor:
    print (row)
print ('--end--')


p = Producer({'bootstrap.servers': '172.16.43.33:9092'})
p.produce('Funnel', key='hello', value='world')
p.flush(10)

Esempio n. 23
0
class DocManager(DocManagerBase):
    """ DocManager that echoes MongoDB Oplog to Kafka.
    """
    _topic_prefix = 'db.mongo.'

    def __init__(self, url, **kwargs):
        """ Sets up producer connection to Kafka.

        Parameters
        ----------
        url : str
            Directly corresponds to the "bootstrap.servers" config when initializing a Kafka entity
        """
        self.producer = Producer({'bootstrap.servers': url})

    def commit(self):
        self.producer.flush()

    def get_last_doc(self):
        """ TODO: For now, this returns nothing.
        """
        pass

    def remove(self, document_id, namespace, timestamp):
        """ Sends a remove message to the corresponding kafka topic.

        Parameters
        ----------
        document_id : str
        namespace : str
        timestamp : bson.timestamp.Timestamp
        """
        msg_topic = self._get_topic(namespace)
        msg_key = document_id
        msg_val = json_dumps({
            'op': 'remove',
            'o': document_id,
            'ts': timestamp,
        })

        return self._produce(msg_topic, msg_key, msg_val)

    def search(self, start_ts, end_ts):
        """ TODO: For now, this returns an empty iterator.
        """
        return iter([])

    def stop(self):
        self.producer.flush()

    def update(self, document_id, update_spec, namespace, timestamp):
        """ Sends an update message to the corresponding kafka topic.

        Parameters
        ----------
        document_id : str
        update_spec : dict
        namespace : str
        timestamp : bson.timestamp.Timestamp
        """
        msg_topic = self._get_topic(namespace)
        msg_key = document_id
        msg_val = json_dumps({
            'op': 'update',
            'o': update_spec,
            'o2': document_id,
            'ts': timestamp,
        })

        return self._produce(msg_topic, msg_key, msg_val)

    def upsert(self, document, namespace, timestamp):
        """ Sends an upsert message to the corresponding kafka topic.

        Parameters
        ----------
        document : dict
        namespace : str
        timestamp : bson.timestamp.Timestamp
        """
        msg_topic = self._get_topic(namespace)
        msg_key = document['_id']
        msg_val = json_dumps({
            'op': 'upsert',
            'o': document,
            'ts': timestamp,
        })

        return self._produce(msg_topic, msg_key, msg_val)

    def _produce(self, topic, key, value):
        """ Helper method for producing to Kafka.
        """
        return self.producer.produce(topic=topic,
                                     key=key,
                                     value=value,
                                     callback=self._delivery_report)

    @staticmethod
    def _get_topic(namespace):
        """ Returns a Kafka topic name based on given parameters.

        Parameters
        ----------
        namespace : str
        """
        return '{}.{}'.format(DocManager._topic_prefix, namespace)

    @staticmethod
    def _delivery_report(err, msg):
        if err is None:
            LOG.info('Message with key {} produced to topic {}: {}'.format(
                msg.key(), msg.topic(), msg.value()))
        else:
            LOG.error(
                'Error while delivering message with key {} to topic {}, with value {}:\n{}'
                .format(msg.key(), msg.topic(), msg.value(), err.str()))
        'bootstrap.servers': "kafka:2181",
        'group.id': "json_producer"
    })
    time.sleep(10)

    def delivery_callback (err, msg):
        if err:
            sys.stderr.write('%% Message failed delivery: %s\n' % err)
        else:
            sys.stderr.write('%% Message delivered to %s [%d]\n' % \
                             (msg.topic(), msg.partition()))

    for tweet in get_tweet('examples/tweets-200k.txt.gz'):
        # if len(tweet['entities']['urls']) > 0 and \
        #         any(tweet['lang'] in l for l in ['es', 'en']):
        try:
            print("%s: %s" % (tweet['user']['screen_name'], tweet['text']))
            kfk.produce(
                "raw_tweets",
                json.dumps(tweet),
                callback=delivery_callback
            )
            kfk.poll(0)
            kfk.flush()
        except BufferError as e:
            sys.stderr.write('%% Local producer queue is full ' \
                             '(%d messages awaiting delivery): try again\n' %
                             len(kfk))


Esempio n. 25
0
class KafkaProducer:
    def __init__(self, logger, cfg, influxdb_client, email_notification):
        """Конструктор класса

        Args:
            logger (TimedRotatingLogger): логер
            cfg (dict): словарь параметров
            influxdb_client (InfluxBDProducer): объект для логирования в базу InfluxDB
            email_notification (EmailNotification): объект для отправки email уведомлений

        """
        self.logger = logger
        self.cfg = cfg
        self.influxdb_client = influxdb_client
        self.email_notification = email_notification
        self.producer = Producer(self.cfg['kafka_broker']['producer_config'])

    @staticmethod
    def delivery_callback(err, msg):
        if err:
            raise KafkaException(err)
        else:
            pass
            # sys.stderr.write('Message delivered to {0} [{1}] @ {2}\n'.
            #                  format(msg.topic(), msg.partition(), msg.offset()))

    def write_message(self, topic, key, message, headers):
        """Запись одного сообщения в очередь Kafka

        Args:
            topic (str): имя очереди для записи сообщения
            key (str): id сообщения (message.id из Traffic)
            message (str): разобранная сделка с атрибутами и значениями в формате словаря
            headers (dict): заголовки сообщения

        """
        try:
            self.producer.produce(topic=topic,
                                  key=key,
                                  value=message,
                                  headers=headers,
                                  callback=self.delivery_callback)
            # синхронная запись сообщений в kafka
            self.producer.flush()
            return True
        except BufferError as be:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.logger.error(
                "Local producer queue is full ({0} messages awaiting delivery): try again\n{1}\n{2}"
                .format(len(self.producer), be,
                        traceback.extract_tb(exc_traceback)))
            self.influxdb_client.write_error(module="KAFKA_PRODUCER")
            return False
        except KafkaException as ke:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.logger.error(
                "Error occurred while writing message into Kafka\n{0}\n{1}\n{2}"
                .format(ke, message, traceback.extract_tb(exc_traceback)))
            self.influxdb_client.write_error(module="KAFKA_PRODUCER")
            self.email_notification.send_error_notification()
            sys.exit(1)
        except TypeError as te:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.logger.error(
                "Error occurred while writing message into Kafka\n{0}\n{1}\n{2}"
                .format(te, message, traceback.extract_tb(exc_traceback)))
            self.influxdb_client.write_error(module="KAFKA_PRODUCER")
            self.email_notification.send_error_notification()
            sys.exit(1)
Esempio n. 26
0
def create_app(config=None, testing=False, cli=True):
    """
    Application factory, used to create application
    """
    app = Flask(__name__, static_folder=None)
    app.port = 5003

    # @app.route("/profanity")
    # def profanity(self):
    #     content = "my message f**k" # make sure this is the actual tweet
    #     url = "https://www.purgomalum.com/service/containsprofanity?text={}".format(content)
    #     profanity = requests.get(url=url)
    #     return {"profanity": profanity.content.decode('UTF8')}

    c = Consumer(
        {
            "bootstrap.servers": "localhost:9092",
            "group.id": "content_curator_twitter_group_21",
            "auto.offset.reset": "earliest",
        }
    )

    p = Producer({"bootstrap.servers": "localhost:9092"})

    c.subscribe(["content_curator_twitter"])

    while True:
        msg = c.poll()

        if msg is None:
            continue
        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            continue

        # print('Received message: {}'.format(msg.value().decode('utf-8')))
        try:
            m = json.loads(msg.value().decode("utf-8"))
            if "content" in m.keys():
                content = m["content"]
                url = "https://www.purgomalum.com/service/containsprofanity?text={}".format(
                    content
                )
                profanity = requests.get(url=url)
                profanity_value = json.dumps(
                    {"profanity": profanity.content.decode("utf-8")}
                )
                msg_key = msg.key().decode("utf-8")

                if msg_key is not None:
                    p.produce(
                        topic="content_curator_twitter",
                        key=msg_key,
                        value=profanity_value,
                    )
                    p.flush()
                    print("ADDED:", {"key": msg_key, "value": profanity_value})
        except Exception as e:
            print("ERROR:", e)

    c.close()

    return app
Esempio n. 27
0
        print()


p = Producer({'bootstrap.servers': '127.0.0.1:9092'})

try:
    for val in range(0, 10):
        topic = "first_topic"
        value = "hello from python #{}".format(val)
        key = "key_{}".format(val)
        p.produce(topic=topic, key=key, value=value, callback=acked)
        p.poll(0.5)

        # Every Key goes to some partition if you rerun the code
        # key_0  part 2
        # key_1  part 0
        # key_2  part 1
        # key_3  part 2
        # key_4  part 1
        # key_5  part 2
        # key_6  part 0
        # key_7  part 0
        # key_8  part 1
        # key_9  part 0

except KeyboardInterrupt:
    pass

p.produce('first_topic', key=None, value='first from python')
p.flush(10)
Esempio n. 28
0
class KafkaProducer(Producer[TPayload]):
    def __init__(self, configuration: Mapping[str, Any],
                 codec: Codec[KafkaPayload, TPayload]) -> None:
        self.__configuration = configuration
        self.__codec = codec

        self.__producer = ConfluentProducer(configuration)
        self.__shutdown_requested = Event()

        # The worker must execute in a separate thread to ensure that callbacks
        # are fired -- otherwise trying to produce "synchronously" via
        # ``produce(...).result()`` could result in a deadlock.
        self.__result = execute(self.__worker)

    def __worker(self) -> None:
        """
        Continuously polls the producer to ensure that delivery callbacks are
        triggered (which correspondingly set the result values on the
        ``Future`` instances returned by ``produce``.) This function exits
        after a shutdown request has been issued (via ``close``) and all
        in-flight messages have been delivered.
        """
        while not self.__shutdown_requested.is_set():
            self.__producer.poll(0.1)
        self.__producer.flush()

    def __delivery_callback(
        self,
        future: Future[Message[TPayload]],
        payload: TPayload,
        error: KafkaError,
        message: ConfluentMessage,
    ) -> None:
        if error is not None:
            future.set_exception(TransportError(error))
        else:
            try:
                timestamp_type, timestamp_value = message.timestamp()
                if timestamp_type is TIMESTAMP_NOT_AVAILABLE:
                    raise ValueError("timestamp not available")

                future.set_result(
                    Message(
                        Partition(Topic(message.topic()), message.partition()),
                        message.offset(),
                        payload,
                        datetime.utcfromtimestamp(timestamp_value / 1000.0),
                    ))
            except Exception as error:
                future.set_exception(error)

    def produce(self, destination: Union[Topic, Partition],
                payload: TPayload) -> Future[Message[TPayload]]:
        if self.__shutdown_requested.is_set():
            raise RuntimeError("producer has been closed")

        if isinstance(destination, Topic):
            produce = partial(self.__producer.produce, topic=destination.name)
        elif isinstance(destination, Partition):
            produce = partial(
                self.__producer.produce,
                topic=destination.topic.name,
                partition=destination.index,
            )
        else:
            raise TypeError("invalid destination type")

        encoded = self.__codec.encode(payload)

        future: Future[Message[TPayload]] = Future()
        future.set_running_or_notify_cancel()
        produce(
            value=encoded.value,
            key=encoded.key,
            headers=encoded.headers,
            on_delivery=partial(self.__delivery_callback, future, payload),
        )
        return future

    def close(self) -> Future[None]:
        self.__shutdown_requested.set()
        return self.__result
Esempio n. 29
0
def test_consumer_rebalance_from_committed_offset():
    consumer_group = 'consumer-{}'.format(uuid.uuid1().hex)
    synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
        'on_delivery': record_message_delivered,
    })

    with create_topic(
            partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic,
                             '{}'.format(i).encode('utf8'),
                             partition=i % 2)

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        Consumer({
            'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
            'group.id': consumer_group,
        }).commit(
            offsets=[
                TopicPartition(
                    message.topic(),
                    message.partition(),
                    message.offset() + 1,
                ) for message in messages_delivered[topic][:2]
            ],
            asynchronous=False,
        )

        consumer_a = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        # Wait until the first consumer has received its assignments.
        for i in xrange(10):  # this takes a while
            assert consumer_a.poll(1) is None
            if assignments_received[consumer_a]:
                break

        assert len(assignments_received[consumer_a]
                   ) == 1, 'expected to receive partition assignment'
        assert set(
            (i.topic, i.partition)
            for i in assignments_received[consumer_a][0]) == set([(topic, 0),
                                                                  (topic, 1)])

        assignments_received[consumer_a].pop()

        consumer_b = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        assignments = {}

        # Wait until *both* consumers have received updated assignments.
        for consumer in [consumer_a, consumer_b]:
            for i in xrange(10):  # this takes a while
                assert consumer.poll(1) is None
                if assignments_received[consumer]:
                    break

            assert len(assignments_received[consumer]
                       ) == 1, 'expected to receive partition assignment'
            assert len(
                assignments_received[consumer]
                [0]) == 1, 'expected to have a single partition assignment'

            i = assignments_received[consumer][0][0]
            assignments[(i.topic, i.partition)] = consumer

        assert set(assignments.keys()) == set([(topic, 0), (topic, 1)])

        for expected_message in messages_delivered[topic][2:]:
            consumer = assignments[(expected_message.topic(),
                                    expected_message.partition())]

            # Make sure that there are no messages ready to consume.
            assert consumer.poll(1) is None

            # Move the committed offset forward for our synchronizing group.
            producer.produce(
                commit_log_topic,
                key='{}:{}:{}'.format(
                    expected_message.topic(),
                    expected_message.partition(),
                    synchronize_commit_group,
                ).encode('utf8'),
                value='{}'.format(expected_message.offset() +
                                  1, ).encode('utf8'),
            )

            assert producer.flush(
                5) == 0, 'producer did not successfully flush queue'

            # We should have received a single message.
            # TODO: Can we also assert that the position is unpaused?)
            for i in xrange(5):
                received_message = consumer.poll(1)
                if received_message is not None:
                    break

            assert received_message is not None, 'no message received'

            assert received_message.topic() == expected_message.topic()
            assert received_message.partition() == expected_message.partition()
            assert received_message.offset() == expected_message.offset()

            # We should not be able to continue reading into the topic.
            # TODO: Can we assert that the position is paused?
            assert consumer.poll(1) is None
Esempio n. 30
0
    # 步驟1. 設定要連線到Kafka集群的相關設定
    props = {
        # Kafka集群在那裡?
        'bootstrap.servers': 'localhost:9092',          # <-- 置換成要連接的Kafka集群
        'error_cb': error_cb                            # 設定接收error訊息的callback函數
    }
    # 步驟2. 產生一個Kafka的Producer的實例
    producer = Producer(props)
    # 步驟3. 指定想要發佈訊息的topic名稱
    topicName = 'ak03.four_partition'
    msgCount = 10000
    try:
        print('Start sending messages ...')
        # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers])
        for i in range(msgCount):
            producer.produce(topicName, key=str(i), value='msg_'+str(i))
            producer.poll(0)  # <-- (重要) 呼叫poll來讓client程式去檢查內部的Buffer
            print('key={}, value={}'.format(str(i), 'msg_' + str(i)))
            time.sleep(3)  # 讓主執行緒停個3秒

        print('Send ' + str(msgCount) + ' messages to Kafka')
    except BufferError as e:
        # 錯誤處理
        sys.stderr.write('%% Local producer queue is full ({} messages awaiting delivery): try again\n'
                         .format(len(producer)))
    except Exception as e:
        print(e)
    # 步驟5. 確認所有在Buffer裡的訊息都己經送出去給Kafka了
    producer.flush(10)
    print('Message sending completed!')
Esempio n. 31
0
def test_consumer_rebalance_from_uncommitted_offset():
    consumer_group = 'consumer-{}'.format(uuid.uuid1().hex)
    synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
        'on_delivery': record_message_delivered,
    })

    with create_topic(
            partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic,
                             '{}'.format(i).encode('utf8'),
                             partition=i % 2)

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        for (topic, partition), offset in {
            (message.topic(), message.partition()): message.offset()
                for message in messages_delivered[topic]
        }.items():
            producer.produce(
                commit_log_topic,
                key='{}:{}:{}'.format(
                    topic,
                    partition,
                    synchronize_commit_group,
                ).encode('utf8'),
                value='{}'.format(offset + 1, ).encode('utf8'),
            )

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        consumer_a = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(consumer_a, [
            lambda message: assignments_received[consumer_a],
            collect_messages_recieved(4),
        ], 10)

        assert len(assignments_received[consumer_a]
                   ) == 1, 'expected to receive partition assignment'
        assert set(
            (i.topic, i.partition)
            for i in assignments_received[consumer_a][0]) == set([(topic, 0),
                                                                  (topic, 1)])
        assignments_received[consumer_a].pop()

        message = consumer_a.poll(1)
        assert message is None or message.error(
        ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve'

        consumer_b = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(consumer_a, [
            lambda message: assignments_received[consumer_a],
        ], 10)

        consume_until_constraints_met(consumer_b, [
            lambda message: assignments_received[consumer_b],
            collect_messages_recieved(2),
        ], 10)

        for consumer in [consumer_a, consumer_b]:
            assert len(assignments_received[consumer][0]) == 1

        message = consumer_a.poll(1)
        assert message is None or message.error(
        ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve'

        message = consumer_b.poll(1)
        assert message is None or message.error(
        ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve'
Esempio n. 32
0
class KafkaProducer:
    def __init__(self,
                 kafka_env='LOCAL',
                 kafka_brokers="",
                 kafka_user="",
                 kafka_password=""):
        self.kafka_env = kafka_env
        self.kafka_brokers = kafka_brokers
        self.kafka_user = kafka_user
        self.kafka_password = kafka_password

    def prepareProducer(self, groupID="pythonproducers"):
        options = {
            'bootstrap.servers': self.kafka_brokers,
            'group.id': groupID
        }
        # We need this test as local kafka does not expect SSL protocol.
        if (self.kafka_env != 'LOCAL'):
            options['security.protocol'] = 'SASL_SSL'
            options['sasl.mechanisms'] = 'PLAIN'
            options['sasl.username'] = self.kafka_user
            options['sasl.password'] = self.kafka_password
        if (self.kafka_env == 'OCP'):
            options['sasl.mechanisms'] = 'SCRAM-SHA-512'
            options['ssl.ca.location'] = os.environ['PEM_CERT']

        # Printing out producer config for debugging purposes
        print("[KafkaConsumer] - This is the configuration for the consumer:")
        print("[KafkaConsumer] - -------------------------------------------")
        print('[KafkaConsumer] - Bootstrap Server:  {}'.format(
            options['bootstrap.servers']))
        if (self.kafka_env != 'LOCAL'):
            # Obfuscate password
            if (len(self.kafka_password) > 3):
                obfuscated_password = self.kafka_password[
                    0] + "*****" + self.kafka_password[len(self.kafka_password)
                                                       - 1]
            else:
                obfuscated_password = "******"
            print('[KafkaConsumer] - Security Protocol: {}'.format(
                options['security.protocol']))
            print('[KafkaConsumer] - SASL Mechanism:    {}'.format(
                options['sasl.mechanisms']))
            print('[KafkaConsumer] - SASL Username:     {}'.format(
                options['sasl.username']))
            print('[KafkaConsumer] - SASL Password:     {}'.format(
                obfuscated_password))
            if (self.kafka_env == 'OCP'):
                print('[KafkaConsumer] - SSL CA Location:   {}'.format(
                    options['ssl.ca.location']))
        print("[KafkaConsumer] - -------------------------------------------")

        # Creating the producer
        self.producer = Producer(options)

    def delivery_report(self, err, msg):
        # Called once for each message produced to indicate delivery result. Triggered by poll() or flush().
        if err is not None:
            print('[ERROR] - [KafkaProducer] - Message delivery failed: {}'.
                  format(err))
        else:
            print('[KafkaProducer] - Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))

    def publishEvent(self, topicName, eventToSend, keyName):
        dataStr = json.dumps(eventToSend)
        self.producer.produce(topicName,
                              key=eventToSend[keyName],
                              value=dataStr.encode('utf-8'),
                              callback=self.delivery_report)
        self.producer.flush()
Esempio n. 33
0
class KafkaConnector(object):
    """Simple wrapper class to configure a simple kafka consumer
    and producer pair, so that they can be used to perform simple
    filter() and map() operations over the received tweets"""

    def __init__(
        self,
        group_id=None,
        consumer_topic='consumer_limbo',
        producer_topic='consumer_limbo',
        logging_topic='minteressa_stats',
        bootstrap_servers='kafka:9092'
    ):

        self.group_id = group_id
        self.bootstrap_servers = bootstrap_servers
        self.consumer_topic = consumer_topic
        self.producer_topic = producer_topic
        self.logging_topic = logging_topic

        self.consumer = None
        self.producer = None

    def listen(self):
        while True:
            msg = self.consumer.poll()
            if msg is None:
                continue
            if msg.error():
                # Error or event
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' % (
                            msg.topic(),
                            msg.partition(),
                            msg.offset()
                        )
                    )
                elif msg.error():
                    # Error
                    raise KafkaException(msg.error())
            else:
                # Proper message
                sys.stdout.write(
                    '%s [partition-%d] at offset %d with key %s:\n' %
                    (
                        msg.topic(),
                        msg.partition(),
                        msg.offset(),
                        str(msg.key())
                    )
                )
                yield msg

    def connect(self):
        self.consumer = Consumer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id,
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            }
        })
        print("subscribing to %s" % self.consumer_topic)
        self.consumer.subscribe([
            self.consumer_topic
        ])
        print("Subscribed to topic %s " % self.consumer_topic)

        self.producer = Producer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id
        })

    def send(self, message, producer_topic=None):
        producer_topic = producer_topic \
            if producer_topic is not None \
            else self.producer_topic

        self.producer.produce(
            producer_topic,
            message
        )
        # self.producer.flush()


    def log(self, message, logging_topic=None):
        logging_topic = logging_topic \
            if logging_topic is not None \
            else self.logging_topic

        self.producer.produce(logging_topic, message)
        self.producer.flush()

    def close(self):
        self.consumer.close()
        self.producer.close()
Esempio n. 34
0
subject = schema.fullname  # == "my.test.value"

# io.confluent.kafka.serializers.subject.TopicRecordNameStrategy:
# The subject name is <topic>-<type>, where <topic> is the Kafka topic name, and <type> is the fully-qualified
# name of the Avro record type of the message. This setting also allows any number of event types in the same topic,
# and further constrains the compatibility check to the current topic only.

# subject = topic + '-' + schema.fullname # == "avro-python-producer-topic-my.test.value"

# get registered schema id from the schema_registry
schema_id = schema_registry.register(subject, schema)

for i in range(5):
    key = "key-" + str(i)
    value = "value-" + str(i)
    record_value = avro_serde.encode_record_with_schema_id(
        schema_id=schema_id,
        record={
            "name": value,
            "type": "avro"
        },
        is_key=False,
    )
    producer.produce(topic, key=key.encode('utf-8'), value=record_value)
    print("Produced:", key, record_value)

producer.flush()

print("End: avro-python-producer")
Esempio n. 35
0
class KafkaDestination(object):
    """ syslog-ng Apache Kafka destination.
    """

    _kafka_producer = None

    _conf = dict()

    def __init__(self):
        self.hosts = None
        self.topic = None
        self.msg_key = None
        self.partition = None
        self.programs = None
        self.group_id = None
        self.broker_version = None
        self.verbose = False
        self.display_stats = False
        self.producer_config = None

    def init(self, args):
        """ This method is called at initialization time.

        Should return False if initialization fails.
        """

        if 'producer_config' in args:
            try:
                self.producer_config = ast.literal_eval(args['producer_config'])
                self._conf.update(self.producer_config)
            except ValueError:
                LOG.error("Given config %s is not in a Python dict format."
                          % args['producer_config'])

        try:
            self.hosts = args['hosts']
            self.topic = args['topic']
            self._conf['bootstrap.servers'] = self.hosts
        except KeyError:
            LOG.error("Missing `hosts` or `topic` option...")
            return False

        if 'msg_key' in args:
            self.msg_key = args['msg_key']
            LOG.info("Message key used will be %s" % self.msg_key)

        if 'partition' in args:
            self.partition = args['partition']
            LOG.info("Partition to produce to %s" % self.partition)

        # optional `programs` parameter to filter out messages
        if 'programs' in args:
            self.programs = parse_str_list(args['programs'])
            LOG.info("Programs to filter against %s" % self.programs)

        if 'group_id' in args:
            self.group_id = args['group_id']
            self._conf['group.id'] = self.group_id
            LOG.info("Broker group_id=%s" % self.group_id)

        if 'broker_version' in args:
            self.broker_version = args['broker_version']
            if '.'.join(self.broker_version.split('.')[:2]) in ('0.10', '0.11'):
                self._conf['api.version.request'] = True
            else:
                self._conf['broker.version.fallback'] = self.broker_version
                self._conf['api.version.request'] = False
            LOG.info("Broker version=%s" % self.broker_version)
        else:
            self.broker_version = DEFAULT_BROKER_VERSION_FALLBACK
            self._conf[
                'broker.version.fallback'] = DEFAULT_BROKER_VERSION_FALLBACK
            self._conf['api.version.request'] = False
            LOG.warn("Default broker version fallback %s "
                     "will be applied here." % DEFAULT_BROKER_VERSION_FALLBACK)

        self._conf['on_delivery'] = delivery_callback
        if 'verbose' in args:
            # provide a global `on_delivery` callback in the `Producer()` config
            # dict better for memory consumptions vs per message callback.
            self.verbose = ast.literal_eval(args['verbose'])
        if not self.verbose:
            # only interested in delivery failures here. We do provide a
            # global on_delivery callback in the Producer() config dict and
            # also set delivery.report.only.error.
            self._conf['delivery.report.only.error'] = True
            LOG.info("Verbose mode is OFF: you will not be able to see "
                     "messages in here. Failures only. Use 'verbose=('True')' "
                     "in your destination options to see successfully "
                     "processed messages in your logs.")

        # display broker stats?
        if 'display_stats' in args:
            self.display_stats = ast.literal_eval(args['display_stats'])
        if self.display_stats:
            self._conf['stats_cb'] = stats_callback
            LOG.info("Broker statistics will be displayed.")

        LOG.info(
            "Initialization of Kafka Python driver w/ args=%s" % self._conf)
        return True

    def open(self):
        """ Open a connection to the Kafka service.

        Should return False if initialization fails.
        """
        LOG.info("Opening connection to the remote Kafka services at %s"
                 % self.hosts)
        self._kafka_producer = Producer(**self._conf)
        return True

    def is_opened(self):
        """ Check if the connection to Kafka is able to receive messages.

        Should return False if target is not open.
        """
        return self._kafka_producer is not None

    def close(self):
        """ Close the connection to the Kafka service.
        """
        LOG.debug("KafkaDestination.close()....")
        if self._kafka_producer is not None:
            LOG.debug("Flushing producer w/ a timeout of 30 seconds...")
            self._kafka_producer.flush(30)
        return True

    # noinspection PyMethodMayBeStatic
    def deinit(self):
        """ This method is called at deinitialization time.
        """
        LOG.debug("KafkaDestination.deinit()....")
        if self._kafka_producer:
            self._kafka_producer = None
        return True

    def send(self, ro_msg):
        """ Send a message to the target service

        It should return True to indicate success, False will suspend the
        destination for a period specified by the time-reopen() option.

        :return: True or False
        """

        # do nothing if msg is empty
        if not ro_msg:
            return True

        # no syslog-ng `values-pair` here we dealing with `LogMessage`
        if type(ro_msg) != dict:
            # syslog-ng `LogMessage` is read-only
            # goal is rfc5424 we cannot use values-pair because of memory leaks
            try:
                msg = {'FACILITY': ro_msg.FACILITY, 'PRIORITY': ro_msg.PRIORITY,
                       'HOST': ro_msg.HOST, 'PROGRAM': ro_msg.PROGRAM,
                       'DATE': ro_msg.DATE, 'MESSAGE': ro_msg.MESSAGE}
            except AttributeError:
                LOG.error("Your version of syslog-ng is not supported. "
                          "Please use syslog-ng 3.7.x")
                return False
        else:
            LOG.warn("You are using `values-pair` if you are using "
                     "syslog-ng <= 3.11 it is known to be leaking...")
            msg = ro_msg
        try:

            # check if we do have a program filter defined.
            msg_program = msg['PROGRAM']
            if self.programs is not None:
                if msg_program not in self.programs:
                    # notify of success
                    return True
            if msg_program == 'firewall':
                firewall_msg = msg['MESSAGE']
                msg['MESSAGE'] = parse_firewall_msg(firewall_msg)
            elif msg_program == 'nat':
                nat_msg = msg['MESSAGE']
                msg['MESSAGE'] = parse_nat_msg(nat_msg)
            # convert date string to UNIX timestamp
            msg_date = msg['DATE']
            if msg_date is not None:
                msg['DATE'] = date_str_to_timestamp(msg_date)

            msg_string = str(msg)

            kwargs = {}
            if self.msg_key and self.msg_key in msg.keys():
                kwargs['key'] = msg[self.msg_key]
            if self.partition:
                try:
                    kwargs['partition'] = int(self.partition)
                except ValueError:
                    LOG.warning(
                        "Ignore partition=%s because it is not an int."
                        % self.partition)

            self._kafka_producer.produce(self.topic, msg_string, **kwargs)

            # `poll()` doesn't do any sleeping at all if you give it 0, all
            # it does is grab a mutex, check a queue, and release the mutex.
            # It is okay to call poll(0) after each produce call, the
            # performance impact is negligible, if any.
            self._kafka_producer.poll(0)
        except BufferError:
            LOG.error("Producer queue is full. This message will be discarded. "
                      "%d messages waiting to be delivered.",
                      len(self._kafka_producer))
            # do not return False here as the destination would be closed
            # and we would have to restart syslog-ng
            sleep(5)
            return True
        except (KafkaException, UnicodeEncodeError) as e:
            LOG.error("An error occurred while trying to send messages...   "
                      "See details: %s" % e, exc_info=True)
            sleep(5)
            # do not return False here as the destination would be closed
            # and we would have to restart syslog-ng
            return True

        return True
Esempio n. 36
0
def test_consumer_rebalance_from_committed_offset(requires_kafka):
    consumer_group = f"consumer-{uuid.uuid1().hex}"
    synchronize_commit_group = f"consumer-{uuid.uuid1().hex}"

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer(
        {
            "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
            "on_delivery": record_message_delivered,
        }
    )

    with create_topic(partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic, f"{i}".encode(), partition=i % 2)

        assert producer.flush(5) == 0, "producer did not successfully flush queue"

        Consumer(
            {"bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"], "group.id": consumer_group}
        ).commit(
            offsets=[
                TopicPartition(message.topic(), message.partition(), message.offset() + 1)
                for message in messages_delivered[topic][:2]
            ],
            asynchronous=False,
        )

        consumer_a = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        # Wait until the first consumer has received its assignments.
        for i in range(10):  # this takes a while
            assert consumer_a.poll(1) is None
            if assignments_received[consumer_a]:
                break

        assert (
            len(assignments_received[consumer_a]) == 1
        ), "expected to receive partition assignment"
        assert {(i.topic, i.partition) for i in assignments_received[consumer_a][0]} == {
            (topic, 0),
            (topic, 1),
        }

        assignments_received[consumer_a].pop()

        consumer_b = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        assignments = {}

        # Wait until *both* consumers have received updated assignments.
        for consumer in [consumer_a, consumer_b]:
            for i in range(10):  # this takes a while
                assert consumer.poll(1) is None
                if assignments_received[consumer]:
                    break

            assert (
                len(assignments_received[consumer]) == 1
            ), "expected to receive partition assignment"
            assert (
                len(assignments_received[consumer][0]) == 1
            ), "expected to have a single partition assignment"

            i = assignments_received[consumer][0][0]
            assignments[(i.topic, i.partition)] = consumer

        assert set(assignments.keys()) == {(topic, 0), (topic, 1)}

        for expected_message in messages_delivered[topic][2:]:
            consumer = assignments[(expected_message.topic(), expected_message.partition())]

            # Make sure that there are no messages ready to consume.
            assert consumer.poll(1) is None

            # Move the committed offset forward for our synchronizing group.
            producer.produce(
                commit_log_topic,
                key=f"{expected_message.topic()}:{expected_message.partition()}:{synchronize_commit_group}".encode(),
                value=f"{expected_message.offset() + 1}".encode(),
            )

            assert producer.flush(5) == 0, "producer did not successfully flush queue"

            # We should have received a single message.
            # TODO: Can we also assert that the position is unpaused?)
            for i in range(5):
                received_message = consumer.poll(1)
                if received_message is not None:
                    break

            assert received_message is not None, "no message received"

            assert received_message.topic() == expected_message.topic()
            assert received_message.partition() == expected_message.partition()
            assert received_message.offset() == expected_message.offset()

            # We should not be able to continue reading into the topic.
            # TODO: Can we assert that the position is paused?
            assert consumer.poll(1) is None
Esempio n. 37
0
class ConfluentKafkaMsgQAPI:
    """
    This class provides API's into interact with Kafka Queue.
    """
    def __init__(self,
                 is_producer=False,
                 is_consumer=False,
                 perform_subscription=False,
                 thread_identifier=None):
        if not is_producer and not is_consumer:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer."
            )
            pass
        self.producer_instance = None
        self.consumer_instance = None
        self.broker_name = None
        self.topic = None
        self.producer_conf = None
        self.consumer_conf = None
        self.is_topic_created = False
        self.perform_subscription = perform_subscription
        self.thread_identifier = thread_identifier
        self.__read_environment_variables()
        # if is_producer:
        #    self.__producer_connect()
        # if is_consumer:
        #    self.__consumer_connect()

    def __read_environment_variables(self):
        """
        This method is used to read the environment variables defined in the OS.
        :return:
        """
        while self.broker_name is None or \
                self.topic is None:
            time.sleep(2)
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: "
                "Trying to read the environment variables...")
            self.broker_name = os.getenv("broker_name_key", default=None)
            self.topic = os.getenv("topic_key", default=None)
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name))
        logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format(
            self.topic))

    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    @staticmethod
    def delivery_callback(err, msg):
        if err:
            logging_to_console_and_syslog('%% Message failed delivery: %s\n' %
                                          err)
        else:
            logging_to_console_and_syslog(
                '%% Message delivered to %s [%d] @ %s\n' %
                (msg.topic(), msg.partition(), str(msg.offset())))

    def __producer_connect(self):
        """
        This method tries to connect to the kafka broker based upon the type of kafka.
        :return:
        """
        is_connected = False
        if self.producer_instance is None:
            try:
                self.producer_conf = {'bootstrap.servers': self.broker_name}
                # Create Producer instance
                self.producer_instance = Producer(**self.producer_conf)
                is_connected = True
            except:
                print("Exception in user code:")
                print("-" * 60)
                traceback.print_exc(file=sys.stdout)
                print("-" * 60)
                time.sleep(5)
            else:
                logging_to_console_and_syslog(
                    "ConfluentKafkaMsgQAPI: Successfully "
                    "connected to broker_name={}".format(self.broker_name))
        return is_connected

    def enqueue(self, filename):
        """
        This method tries to post a message to the pre-defined kafka topic.
        :param filename:
        :return status False or True:
        """
        status = False

        if filename is None or len(filename) == 0:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: filename is None or invalid")
            return status
        if self.producer_instance is None:
            logging_to_console_and_syslog(
                "KafkaMsgQAPI: Producer instance is None. Trying to create one.."
            )
            if not self.__producer_connect():
                logging_to_console_and_syslog(
                    "Unable to create producer instance.")
                return status

        if not self.is_topic_created:
            try:
                if self.producer_instance.list_topics(self.topic, timeout=1.0):
                    logging_to_console_and_syslog(
                        "Found topic name = {} in the zookeeper.".format(
                            self.topic))
                    self.is_topic_created = True
            except KafkaException:
                kafka_admin_client = admin.AdminClient(self.producer_conf)
                logging_to_console_and_syslog("Creating topic {}.".format(
                    self.topic))
                ret = kafka_admin_client.create_topics(new_topics=[
                    admin.NewTopic(topic=self.topic, num_partitions=1)
                ],
                                                       operation_timeout=1.0)
                logging_to_console_and_syslog("ret = {}".format(ret))

        # Asynchronously produce a message, the delivery report callback
        # will be triggered from poll() above, or flush() below, when the message has
        # been successfully delivered or failed permanently.
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: Posting filename={} into "
            "kafka broker={}, topic={}".format(filename, self.broker_name,
                                               self.topic))
        value = filename.encode('utf-8')
        try:
            # Produce line (without newline)
            self.producer_instance.produce(
                self.topic,
                value,
                callback=ConfluentKafkaMsgQAPI.delivery_callback)
            status = True
        except BufferError:
            sys.stderr.write('%% Local producer queue is full '
                             '(%d messages awaiting delivery): try again\n' %
                             len(self.producer_instance))
            status = False
        except:
            print("ConfluentKafkaMsgQAPI: Exception in user code:")
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
            status = False
        else:
            event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \
                    "kafka broker={}, topic={}." \
                .format(filename,
                        self.broker_name,
                        self.topic)
            logging_to_console_and_syslog(event)
            # Wait for any outstanding messages to be delivered and delivery report
            # callbacks to be triggered.
            # Serve delivery callback queue.
            # NOTE: Since produce() is an asynchronous API this poll() call
            #       will most likely not serve the delivery callback for the
            #       last produce()d message.
            self.producer_instance.poll(timeout=0.1)
            # Wait until all messages have been delivered
            # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance))
            self.producer_instance.flush(timeout=0.1)

            return status

    def __consumer_connect_to_broker(self):
        """
        This method tries to connect to the kafka broker.
        :return:
        """
        is_connected = False

        # Consumer configuration
        # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """
            self.consumer_conf = {'bootstrap.servers': self.broker_name,
                              'group.id': 'kafka-consumer',
                              'session.timeout.ms': 6000,
                              'auto.offset.reset': 'earliest'}
        """
        if self.consumer_instance is None:
            try:

                logging_to_console_and_syslog(
                    "Consumer:{}:Trying to connect to broker_name={}".format(
                        self.thread_identifier, self.broker_name))
                # Create Consumer instance
                # Hint: try debug='fetch' to generate some log messages
                consumer_conf = {
                    'bootstrap.servers': self.broker_name,
                    'group.id': self.topic,
                    'session.timeout.ms': 6000,
                    'auto.offset.reset': 'earliest'
                }

                # consumer_conf['stats_cb'] = stats_cb
                # consumer_conf['statistics.interval.ms'] = 0
                self.consumer_instance = Consumer(consumer_conf)
                is_connected = True
            except:
                logging_to_console_and_syslog(
                    "Consumer:{}:Exception in user code:".format(
                        self.thread_identifier))
                logging_to_console_and_syslog("-" * 60)
                traceback.print_exc(file=sys.stdout)
                logging_to_console_and_syslog("-" * 60)
                time.sleep(5)

        logging_to_console_and_syslog("Consumer:{}:Consumer Successfully "
                                      "connected to broker_name={}".format(
                                          self.thread_identifier,
                                          self.broker_name))
        return is_connected

    @staticmethod
    def print_assignment(consumer, partitions):
        print('consumer = {}, Assignment {}:'.format(consumer, partitions))

    def dequeue(self):
        conf = {
            'bootstrap.servers': self.broker_name,
            'group.id': self.topic,
            'session.timeout.ms': 6000,
            'auto.offset.reset': 'earliest'
        }
        if not self.consumer_instance:
            self.consumer_instance = Consumer(conf)
            self.consumer_instance.subscribe(
                [self.topic], on_assign=ConfluentKafkaMsgQAPI.print_assignment)
        msg = self.consumer_instance.poll(timeout=1.0)
        if msg is None or msg.error():
            return None
        else:
            logging_to_console_and_syslog(
                '%% %s [%d] at offset %d with key %s:\n' %
                (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))
            msg = msg.value().decode('utf8')
            logging_to_console_and_syslog("msg.value()={}".format(msg))
            self.consumer_instance.close()
            self.consumer_instance = None
            return msg

    def cleanup(self):
        if self.consumer_instance:
            self.consumer_instance.close()
            self.consumer_instance = None
Esempio n. 38
0
def test_consumer_start_from_partition_start(requires_kafka):
    synchronize_commit_group = f"consumer-{uuid.uuid1().hex}"

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer(
        {
            "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
            "on_delivery": record_message_delivered,
        }
    )

    with create_topic() as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(3):
            producer.produce(topic, f"{i}".encode())

        assert producer.flush(5) == 0, "producer did not successfully flush queue"

        # Create the synchronized consumer.
        consumer = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=f"consumer-{uuid.uuid1().hex}",
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = []

        def on_assign(c, assignment):
            assert c is consumer
            assignments_received.append(assignment)

        consumer.subscribe([topic], on_assign=on_assign)

        # Wait until we have received our assignments.
        for i in range(10):  # this takes a while
            assert consumer.poll(1) is None
            if assignments_received:
                break

        assert len(assignments_received) == 1, "expected to receive partition assignment"
        assert {(i.topic, i.partition) for i in assignments_received[0]} == {(topic, 0)}

        # TODO: Make sure that all partitions remain paused.

        # Make sure that there are no messages ready to consume.
        assert consumer.poll(1) is None

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0]
        producer.produce(
            commit_log_topic,
            key=f"{message.topic()}:{message.partition()}:{synchronize_commit_group}".encode(),
            value=f"{message.offset() + 1}".encode(),
        )

        assert producer.flush(5) == 0, "producer did not successfully flush queue"

        # We should have received a single message.
        # TODO: Can we also assert that the position is unpaused?)
        for i in range(5):
            message = consumer.poll(1)
            if message is not None:
                break

        assert message is not None, "no message received"

        expected_message = messages_delivered[topic][0]
        assert message.topic() == expected_message.topic()
        assert message.partition() == expected_message.partition()
        assert message.offset() == expected_message.offset()

        # We should not be able to continue reading into the topic.
        # TODO: Can we assert that the position is paused?
        assert consumer.poll(1) is None
Esempio n. 39
0
class BMPNodes(object):
    def __init__(self, bootstrap_server=None, redishost=None):
        self.nodes = {}
        if redishost is None:
            raise ValueError("Redis Hostname not specified, bailing out")
        else:
            self.redis = redis.StrictRedis(host=redishost)
            self.redis.flushall()
            self.pubsub = self.redis.pubsub()

        self.routerevent = threading.Event()
        self.peerevent = threading.Event()
        self.threadList = []
        self.poisonpillq = Queue.Queue()
        self.peer_consumer = None
        self.router_consumer = None
        self.prefix_consumer = None
        self.rib_producer = None

        if bootstrap_server is not None:
            self.bootstrap_server = bootstrap_server

            for fn in [
                    self.capture_router_msg, self.capture_peer_msg,
                    self.capture_prefix_msg, self.redis_listener
            ]:
                thread = threading.Thread(target=fn, args=())
                self.threadList.append(thread)
                thread.daemon = True  # Daemonize thread
                thread.start()  # Start the execution
        else:
            raise ValueError("Bootstrap server not specified")

        self.dispatch = {'init': self.add_router, 'term': self.delete_router}

        self.redis_dispatch = {
            'AdjInRib': self.adjRibPolicyWorker,
            'AdjInRibPP': self.localRibWorker,
            'localRib': self.kafkaWorker
        }

    def get_nodes(self):
        nodeset = {}
        for node in self.nodes.keys():
            rtr = self.nodes[node]
            nodeset.update({str(rtr.name) + ':' + str(rtr.ipaddr): node})
            # Also provide the reverse mapping
            nodeset.update({node: str(rtr.name) + ':' + str(rtr.ipaddr)})
        return nodeset

    def serialize(self):
        nodeset = {}
        for node in self.nodes.keys():
            nodeset.update({node: self.nodes[node].serialize()})

        return nodeset

    class PoisonPillException(Exception):
        pass

    def consumer_cleanup(self):
        logger.debug("Cleaning up, exiting the active threads")
        for thread in self.threadList:
            self.poisonpillq.put("quit")

        # The redis listener will need the poisonpill channel publish
        self.redis.publish('poisonpill', "quit")

        for thread in self.threadList:
            logger.debug("Waiting for %s to finish..." % (thread.name))
            thread.join()
        return

    def process_msg(self, router_msg):
        # Ignore the first message (action = first)
        for msg in router_msg:
            if str(msg['action']) != 'first':
                self.dispatch[str(msg['action'])](msg)
            else:
                logger.debug("Ignoring action=first in openbmp router message")

    def add_router(self, router_msg):
        if str(router_msg['hash']) not in self.nodes:
            # Create the router object
            node = Node(node_hash=router_msg['hash'],
                        name=router_msg.pop('name'),
                        ipaddr=router_msg.pop('ip_address'),
                        data=router_msg)

            # Add to existing router set
            self.nodes.update({str(router_msg['hash']): node})

        else:
            logger.debug(
                "Received an add event for an existing peer. Strange, but ignore"
            )

    def delete_router(self, router_msg):
        if str(router_msg['hash']) in self.nodes:
            # Delete the particular router from the current router set
            del self.nodes[str(router_msg['hash'])]

            # Delete the router hash from redis
            self.redis.delete(str(router_msg['hash']))
        else:
            logger.debug(
                "Received a del event for a non-existent peer, ignore")

    def update_redis(self, channel=None):
        # Called to reflect latest state when new messages are received.
        nodes = {}
        if self.get_nodes():
            self.redis.hmset("routers", self.get_nodes())
            for node in self.nodes.keys():
                self.redis.hmset(node, self.nodes[node].serialize())

        if channel:
            # Publish message to redis Listeners
            self.redis.publish(
                channel, "Publish to " +
                str(self.redis_dispatch[channel].__name__) + " worker")

    def redis_listener(self):
        self.pubsub.subscribe(
            ['AdjInRib', 'AdjInRibPP', 'localRib', 'poisonpill'])
        pill = ''
        try:
            while True:
                for item in self.pubsub.listen():
                    logger.info("Received Redis event")
                    if item['data'] == "quit":
                        self.pubsub.unsubscribe()
                        logger.debug(
                            "unsubscribed and finished redis pubsub listener")
                        raise self.PoisonPillException
                    else:
                        if item['channel'] in self.redis_dispatch:
                            self.redis_dispatch[item['channel']]()

        except self.PoisonPillException:
            return

        except Exception as e:
            logger.debug("Error while listening to redis events")
            logger.debug("Error is" + str(e))
            return

    def adjRibPolicyWorker(self):
        logger.debug("Received an AdjInRib event")
        # walk through the nodes and apply available policies
        #nodes = {}
        if self.get_nodes():
            for node in self.nodes.keys():
                # process and apply policies
                self.nodes[node].adjInRibPP.process_adjInRib(node, self.redis)

        self.update_redis('AdjInRibPP')

    def localRibWorker(self):
        # walk through the nodes and apply available path selection algorithms
        #nodes = {}
        if self.get_nodes():
            for node in self.nodes.keys():
                # process and do path selection
                self.nodes[node].localRib.process_adjInRibPP(node, self.redis)

        self.update_redis('localRib')

    # Optional per-message delivery callback (triggered by poll() or flush())
    # during the rib stream to kafka when a message has been successfully delivered
    # or permanently failed delivery (after retries).

    @staticmethod
    def delivery_callback(err, msg):
        if err:
            logger.debug('%% Message failed delivery: %s\n' % err)
        else:
            logger.debug('%% Message delivered to %s [%d]\n' %
                         (msg.topic(), msg.partition()))

    def kafkaWorker(self):
        # With the local Rib ready, push routes to Kafka. This is meant to
        # serve as a streaming set of routes to router clients which will be
        # kafka consumers. This is NOT a way to resync if the router dies or
        # router client disconnects - for that sync with the redis database
        # first and then start listening to fresh messages from Kafka for route events.

        self.rib_producer = Producer(
            {'bootstrap.servers': self.bootstrap_server})

        if self.get_nodes():
            for node in self.nodes.keys():

                topic = self.nodes[node].hash

                # fetch localRib routes from Redis, push to Kafka bus
                localRib = ast.literal_eval(self.redis.hget(node, 'localRib'))
                if localRib:
                    for route in localRib:
                        logger.debug(route)
                        #   self.shuttler.rtQueue.put(route)
                        try:
                            self.rib_producer.produce(
                                topic,
                                value=json.dumps(route),
                                callback=self.delivery_callback)
                            self.rib_producer.poll(0)
                        except BufferError as e:
                            logger.debug(
                                '%% Local producer queue is full (%d messages awaiting delivery): try again\n'
                                % len(self.rib_producer))
                            #  putting the poll() first to block until there is queue space available.
                            # This blocks for RIB_PRODUCER_WAIT_INTERVAL seconds because  message delivery can take some time
                            # if there are temporary errors on the broker (e.g., leader failover).
                            self.rib_producer.poll(RIB_PRODUCER_WAIT_INTERVAL *
                                                   1000)

                            # Now try again when there is hopefully some free space on the queue
                            self.rib_producer.produce(
                                topic,
                                value=json.dumps(route),
                                callback=self.delivery_callback)

                    # Wait until all messages have been delivered
                    logger.debug('%% Waiting for %d deliveries\n' %
                                 len(self.rib_producer))
                    self.rib_producer.flush()

    def capture_router_msg(self):
        pill = ''
        topics = ['openbmp.parsed.router']
        logger.debug("Connecting to Kafka to receive router messages")
        self.router_consumer = Consumer({
            'bootstrap.servers':
            self.bootstrap_server,
            'group.id':
            'bmp_client' + str(time.time()),
            'client.id':
            'bmp_client' + str(time.time()),
            'default.topic.config': {
                'auto.offset.reset': 'smallest',
                'auto.commit.interval.ms': 1000,
                'enable.auto.commit': True
            }
        })

        self.router_consumer.subscribe(topics)

        try:
            while True:
                msg = self.router_consumer.poll(timeout=1.0)

                try:
                    pill = self.poisonpillq.get_nowait()
                except Queue.Empty:
                    pass

                if isinstance(pill, str) and pill == "quit":
                    raise self.PoisonPillException

                if msg is None:
                    self.routerevent.set()
                    continue
                if msg.error():
                    # Error or event
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        logger.debug(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        # Error
                        raise KafkaException(msg.error())
                else:
                    # Process the  message
                    m = Message(msg.value())  # Gets body of kafka message.
                    t = msg.topic()  # Gets topic of kafka message.
                    m_tag = t.split('.')[2].upper()
                    t_stamp = str(datetime.datetime.now())

                    if t == "openbmp.parsed.router":
                        router = Router(m)
                        logger.debug('Received Message (' + t_stamp + ') : ' +
                                     m_tag + '(V: ' + str(m.version) + ')')
                        logger.debug(router.to_json_pretty())
                        router_msg = yaml.safe_load(router.to_json_pretty())
                        logger.debug("Calling process msg for Router messages")
                        bmpnodes.process_msg(router_msg)
                        # update redis
                        self.update_redis()
                        self.routerevent.clear()

        except self.PoisonPillException:
            logger.debug("Poison Pill received")
            logger.debug("Shutting down the router message consumer")
            self.router_consumer.close()
            return

        except Exception as e:
            logger.debug(
                "Exception occurred while listening for router messages")
            logger.debug("Error is " + str(e))
            self.router_consumer.close()
            return

    def capture_peer_msg(self):

        pill = ''
        topics = ['openbmp.parsed.peer']
        logger.info("Connecting to Kafka to receive peer messages")
        self.peer_consumer = Consumer({
            'bootstrap.servers':
            self.bootstrap_server,
            'group.id':
            'bmp_client' + str(time.time()),
            'client.id':
            'bmp_client' + str(time.time()),
            'default.topic.config': {
                'auto.offset.reset': 'smallest',
                'auto.commit.interval.ms': 1000,
                'enable.auto.commit': True
            }
        })

        self.peer_consumer.subscribe(topics)

        try:
            while True:
                msg = self.peer_consumer.poll(timeout=1.0)

                try:
                    pill = self.poisonpillq.get_nowait()
                except Queue.Empty:
                    pass

                if isinstance(pill, str) and pill == "quit":
                    raise self.PoisonPillException

                if msg is None:
                    self.peerevent.set()
                    continue
                if msg.error():
                    # Error or event
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        logger.debug(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        # Error
                        raise KafkaException(msg.error())
                else:
                    # Process the  message
                    m = Message(msg.value())  # Gets body of kafka message.
                    t = msg.topic()  # Gets topic of kafka message.
                    m_tag = t.split('.')[2].upper()
                    t_stamp = str(datetime.datetime.now())

                    if t == "openbmp.parsed.peer":
                        peer = Peer(m)
                        logger.debug('Received Message (' + t_stamp + ') : ' +
                                     m_tag + '(V: ' + str(m.version) + ')')
                        logger.debug(peer.to_json_pretty())
                        peer_msg = yaml.safe_load(peer.to_json_pretty())
                        for msg in peer_msg:
                            processed = False
                            while not processed:
                                if str(msg['router_hash']) in self.nodes:
                                    self.nodes[str(
                                        msg['router_hash'])].process_msg(msg)
                                    processed = True
                                else:
                                    logger.debug(
                                        "Received peer message for currently unknown Router, hash="
                                        + str(msg['router_hash']))
                                    logger.debug(
                                        "Let's wait for router_msg event to be set"
                                    )
                                    self.routerevent.wait(
                                        PEER_MSG_DAMPENING_TIMER)

                        # Go ahead and update Redis
                        self.update_redis()
                        self.peerevent.clear()

        except self.PoisonPillException:
            logger.debug("Poison Pill received")
            logger.debug("Shutting down the peer message consumer")
            self.peer_consumer.close()
            return

        except Exception as e:
            logger.debug(
                "Exception occured while listening to peer messages from Kafka"
            )
            logger.debug("Error is " + str(e))
            self.router_consumer.close()
            return

    def capture_prefix_msg(self):
        pill = ''
        topics = ['openbmp.parsed.unicast_prefix']
        logger.debug("Connecting to Kafka to receive prefix messages")
        self.prefix_consumer = Consumer({
            'bootstrap.servers':
            self.bootstrap_server,
            'group.id':
            'bmp_client' + str(time.time()),
            'client.id':
            'bmp_client' + str(time.time()),
            'default.topic.config': {
                'auto.offset.reset': 'smallest',
                'auto.commit.interval.ms': 1000,
                'enable.auto.commit': True
            }
        })

        self.prefix_consumer.subscribe(topics)

        try:
            while True:
                msg = self.prefix_consumer.poll(timeout=1.0)

                try:
                    pill = self.poisonpillq.get_nowait()
                except Queue.Empty:
                    pass

                if isinstance(pill, str) and pill == "quit":
                    raise self.PoisonPillException

                if msg is None:
                    continue
                if msg.error():
                    # Error or event
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        logger.debug(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        # Error
                        raise KafkaException(msg.error())
                else:
                    # Process the  message
                    m = Message(msg.value())  # Gets body of kafka message.
                    t = msg.topic()  # Gets topic of kafka message.
                    m_tag = t.split('.')[2].upper()
                    t_stamp = str(datetime.datetime.now())

                    if t == "openbmp.parsed.unicast_prefix":
                        unicast_prefix = UnicastPrefix(m)
                        logger.debug('Received Message (' + t_stamp + ') : ' +
                                     m_tag + '(V: ' + str(m.version) + ')')
                        logger.debug(unicast_prefix.to_json_pretty())
                        prefix_msg = yaml.safe_load(
                            unicast_prefix.to_json_pretty())

                        for msg in prefix_msg:
                            processed = False
                            while not processed:
                                if str(msg['router_hash']) in self.nodes:
                                    self.nodes[str(msg['router_hash']
                                                   )].adjInRib.process_msg(msg)
                                    processed = True
                                else:
                                    logger.debug(
                                        "Received peer message for currently unknown Router, hash="
                                        + str(msg['router_hash']))
                                    logger.debug(
                                        "Let's wait for router_msg event to be set"
                                    )
                                    self.peerevent.wait(
                                        PREFIX_MSG_DAMPENING_TIMER)

                        # Go ahead and update Redis
                        self.update_redis('AdjInRib')

        except self.PoisonPillException:
            logger.debug("Poison Pill received")
            logger.debug("Shutting down the prefix message consumer")
            self.prefix_consumer.close()
            return

        except Exception as e:
            logger.debug(
                "Exception occurred while listening for prefix messages")
            logger.debug("Error is " + str(e))
            self.prefix_consumer.close()
            return
    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    def delivery_callback(err, msg):
        if err:
            sys.stderr.write('%% Message failed delivery: %s\n' % err)
        else:
            sys.stderr.write('%% Message delivered to %s [%d] @ %d\n' %
                             (msg.topic(), msg.partition(), msg.offset()))

    # Read lines from stdin, produce each line to Kafka
    for line in sys.stdin:
        try:
            # Produce line (without newline)
            p.produce(topic, line.rstrip(), callback=delivery_callback)

        except BufferError:
            sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' %
                             len(p))

        # Serve delivery callback queue.
        # NOTE: Since produce() is an asynchronous API this poll() call
        #       will most likely not serve the delivery callback for the
        #       last produce()d message.
        p.poll(0)

    # Wait until all messages have been delivered
    sys.stderr.write('%% Waiting for %d deliveries\n' % len(p))
    p.flush()
Esempio n. 41
0
def test_produce_headers():
    """ Test produce() with timestamp arg """
    p = Producer({
        'socket.timeout.ms': 10,
        'error_cb': error_cb,
        'message.timeout.ms': 10
    })

    binval = pack('hhl', 1, 2, 3)

    headers_to_test = [
        [('headerkey', 'headervalue')],
        [('dupkey', 'dupvalue'), ('empty', ''), ('dupkey', 'dupvalue')],
        [('dupkey', 'dupvalue'), ('dupkey', 'diffvalue')],
        [('key_with_null_value', None)],
        [('binaryval', binval)],
        [('alreadyutf8', u'Småland'.encode('utf-8'))],
        [('isunicode', 'Jämtland')],
        {
            'headerkey': 'headervalue'
        },
        {
            'dupkey': 'dupvalue',
            'empty': '',
            'dupkey': 'dupvalue'
        },  # noqa: F601
        {
            'dupkey': 'dupvalue',
            'dupkey': 'diffvalue'
        },  # noqa: F601
        {
            'key_with_null_value': None
        },
        {
            'binaryval': binval
        },
        {
            'alreadyutf8': u'Småland'.encode('utf-8')
        },
        {
            'isunicode': 'Jämtland'
        }
    ]

    for headers in headers_to_test:
        print('headers', type(headers), headers)
        p.produce('mytopic', value='somedata', key='a key', headers=headers)
        p.produce('mytopic', value='somedata', headers=headers)

    with pytest.raises(TypeError):
        p.produce('mytopic', value='somedata', key='a key', headers=('a', 'b'))

    with pytest.raises(TypeError):
        p.produce('mytopic',
                  value='somedata',
                  key='a key',
                  headers=[('malformed_header')])

    with pytest.raises(TypeError):
        p.produce('mytopic', value='somedata', headers={'anint': 1234})

    p.flush()
Esempio n. 42
0
def test_consumer_start_from_partition_start():
    synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
        'on_delivery': record_message_delivered,
    })

    with create_topic() as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(3):
            producer.produce(topic, '{}'.format(i).encode('utf8'))

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        # Create the synchronized consumer.
        consumer = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group='consumer-{}'.format(uuid.uuid1().hex),
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        assignments_received = []

        def on_assign(c, assignment):
            assert c is consumer
            assignments_received.append(assignment)

        consumer.subscribe([topic], on_assign=on_assign)

        # Wait until we have received our assignments.
        for i in xrange(10):  # this takes a while
            assert consumer.poll(1) is None
            if assignments_received:
                break

        assert len(assignments_received
                   ) == 1, 'expected to receive partition assignment'
        assert set((i.topic, i.partition)
                   for i in assignments_received[0]) == set([(topic, 0)])

        # TODO: Make sure that all partitions remain paused.

        # Make sure that there are no messages ready to consume.
        assert consumer.poll(1) is None

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0]
        producer.produce(
            commit_log_topic,
            key='{}:{}:{}'.format(
                message.topic(),
                message.partition(),
                synchronize_commit_group,
            ).encode('utf8'),
            value='{}'.format(message.offset() + 1, ).encode('utf8'),
        )

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        # We should have received a single message.
        # TODO: Can we also assert that the position is unpaused?)
        for i in xrange(5):
            message = consumer.poll(1)
            if message is not None:
                break

        assert message is not None, 'no message received'

        expected_message = messages_delivered[topic][0]
        assert message.topic() == expected_message.topic()
        assert message.partition() == expected_message.partition()
        assert message.offset() == expected_message.offset()

        # We should not be able to continue reading into the topic.
        # TODO: Can we assert that the position is paused?
        assert consumer.poll(1) is None
Esempio n. 43
0
class KafkaWorkflowResultsSender(object):
    def __init__(self, execution_db, message_converter=ProtobufWorkflowResultsConverter, socket_id=None):
        self._ready = False

        self.id_ = socket_id
        kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG
        self.producer = Producer(kafka_config)
        self.execution_db = execution_db
        self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC
        self.message_converter = message_converter

        if self.check_status():
            self._ready = True

    def shutdown(self):
        self.producer.flush()

    @staticmethod
    def _delivery_callback(err, msg):
        if err is not None:
            logger.error('Kafka message delivery failed: {}'.format(err))

    def _format_topic(self, event):
        return '{}.{}'.format(self.topic, event.name)

    def handle_event(self, workflow, sender, **kwargs):
        """Listens for the data_sent callback, which signifies that an execution element needs to trigger a
                callback in the main thread.

            Args:
                workflow (Workflow): The Workflow object that triggered the event
                sender (ExecutionElement): The execution element that sent the signal.
                kwargs (dict): Any extra data to send.
        """
        event = kwargs['event']
        if event in [WalkoffEvent.TriggerActionAwaitingData, WalkoffEvent.WorkflowPaused]:
            saved_workflow = SavedWorkflow.from_workflow(workflow)
            self.execution_db.session.add(saved_workflow)
            self.execution_db.session.commit()
        elif event == WalkoffEvent.ConsoleLog:
            action = workflow.get_executing_action()
            sender = action

        if self.id_:
            packet_bytes = self.message_converter.event_to_protobuf(sender, workflow, **kwargs)
            self.producer.produce(self._format_topic(event), packet_bytes, callback=self._delivery_callback)
        else:
            event.send(sender, data=kwargs.get('data', None))

    def is_ready(self):
        return self._ready

    def check_status(self):
        if self.producer is not None:
            return True
        return False

    def send_ready_message(self):
        WalkoffEvent.CommonWorkflowSignal.send(sender={'id': '1'}, event=WalkoffEvent.WorkerReady)

    def create_workflow_request_message(self, workflow_id, workflow_execution_id, start=None, start_arguments=None,
                                        resume=False, environment_variables=None, user=None):
        return self.message_converter.create_workflow_request_message(workflow_id, workflow_execution_id, start,
                                                                      start_arguments, resume, environment_variables,
                                                                      user)
Esempio n. 44
0
from confluent_kafka import Producer

p = Producer({'bootstrap.servers': 'localhost:9092'})
p.produce('mytopic', value='world3')
p.flush(30)
'''
def acked(err, msg):
    if err is not None:
        print("Failed to deliver message: {0}: {1}"
              .format(msg.value(), err.str()))
    else:
        print("Message produced: {0}".format(msg.value()))

p = Producer({'bootstrap.servers': 'localhost:9092'})

try:
    for val in xrange(1, 1000):
        p.produce('mytopic', 'myvalue #{0}'
                  .format(val), callback=acked)
        p.poll(0.5)

except KeyboardInterrupt:
    pass

p.flush(30)
'''
Esempio n. 45
0
def publish(request):
    elasticsearch_server = 'http://localhost:9200/clincoded'
    return_object = {'status': 'Fail',
                 'message': 'Unable to deliver message'}

    # Check that required parameters have been provided
    if not('type' in request.params and 'uuid' in request.params):
        return_object['message'] = 'Required parameters missing in request'
        return return_object

    # Attempt to retrieve data (from Elasticsearch)
    try:
        searchRes = requests.get('{}/{}/{}'.format(elasticsearch_server, request.params['type'], request.params['uuid']), timeout=10)

        if searchRes.status_code != requests.codes.ok:
            return_object['message'] = 'Data search failed'
            return return_object

    except Exception as e:
        return_object['message'] = 'Data search could not be completed'
        return return_object

    # Store JSON-encoded content of search result(s)
    try:
        resultJSON = searchRes.json()

    except Exception as e:
        return_object['message'] = 'Retrieved data not in expected format'
        return return_object

    # Check that search found data
    if 'found' not in resultJSON or not(resultJSON['found']):
        return_object['message'] = 'Requested data could not be found'
        return return_object

    # Check that data has expected elements
    try:
        data_type_to_publish = resultJSON['_source']['embedded']['resourceType']

        if data_type_to_publish == 'classification':
            evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['gdm']
            publishing_affiliation = resultJSON['_source']['embedded']['resource']['affiliation']
            evidence_counts_to_publish = resultJSON['_source']['embedded']['resource']['classificationPoints']

        elif data_type_to_publish == 'interpretation':
            evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['interpretation']

        else:
            raise Exception

    except Exception as e:
        return_object['message'] = 'Retrieved data missing expected elements'
        return return_object

    # Check that message should be sent? (approved status? permission to publish?)

    # Construct message
    try:
        if data_type_to_publish == 'interpretation':
            message_template = deepcopy(clincoded.messaging.templates.vci_to_dx.message_template)
            data_to_remove = clincoded.messaging.templates.vci_to_dx.data_to_remove
            add_data_to_msg_template(resultJSON['_source']['embedded'], None, None, message_template)

        else:
            message_template = deepcopy(clincoded.messaging.templates.gci_to_dx.message_template)
            classification_points = deepcopy(evidence_counts_to_publish)
            add_data_to_msg_template(resultJSON['_source']['embedded'], gather_evidence(evidence_to_publish, publishing_affiliation),
                gather_evidence_counts(classification_points, True), message_template)
            message = json.dumps(message_template, separators=(',', ':'))

    except Exception as e:
        return_object['message'] = 'Failed to build complete message'
        return return_object

    # Transform message (if necessary, via independent service)
    try:
        if data_type_to_publish == 'interpretation':
            remove_data_from_msg_template(data_to_remove, message_template['interpretation'])
            message_template['interpretation'] = transform_interpretation(message_template['interpretation'], request.host)
            message = json.dumps(message_template, separators=(',', ':'))

    except Exception as e:
        if e.args:
            return_object['message'] = e.args
        else:
            return_object['message'] = 'Failed to build complete message'
        return return_object

    # Configure message delivery parameters
    kafka_cert_pw = ''

    if 'KAFKA_CERT_PW' in os.environ:
        kafka_cert_pw = os.environ['KAFKA_CERT_PW']

    kafka_conf = {'bootstrap.servers': 'localhost:9093',
            'log_level': 0,
            'security.protocol': 'ssl',
            'ssl.key.location': 'etc/certs/client.key',
            'ssl.key.password': kafka_cert_pw,
            'ssl.certificate.location': 'etc/certs/client.crt',
            'ssl.ca.location': 'etc/certs/server.crt'}
    kafka_topic = 'test'
    kafka_timeout = 10

    if request.host != 'localhost:6543':
        kafka_conf = {'bootstrap.servers': 'exchange.clinicalgenome.org:9093',
            'log_level': 0,
            'security.protocol': 'ssl',
            'ssl.key.location': 'etc/certs/dataexchange/client.key',
            'ssl.key.password': kafka_cert_pw,
            'ssl.certificate.location': 'etc/certs/dataexchange/client.crt',
            'ssl.ca.location': 'etc/certs/dataexchange/server.crt'}

        if data_type_to_publish == 'interpretation':
            kafka_topic = 'variant_interpretation'
        else:
            kafka_topic = 'gene_validity'

        if request.host != 'curation.clinicalgenome.org':
            kafka_topic += '_dev'

    # Send message
    p = Producer(**kafka_conf)

    def delivery_callback(err, msg):
        nonlocal return_object
        if err:
            return_object['message'] = err

        else:
            return_object = {'status': 'Success',
                         'message': message,
                         'partition': msg.partition(),
                         'offset': msg.offset()}

    try:
        p.produce(kafka_topic, message, callback=delivery_callback)
        p.flush(kafka_timeout)
        return return_object

    except Exception as e:
        return_object['message'] = 'Message delivery failed'
        return return_object