def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        p = Producer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    p = Producer({'socket.timeout.ms': 10,
                  'error_cb': error_cb,
                  'message.timeout.ms': 10})

    p.produce('mytopic')
    p.produce('mytopic', value='somedata', key='a key')

    def on_delivery(err, msg):
        print('delivery', str)
        # Since there is no broker, produced messages should time out.
        assert err.code() == KafkaError._MSG_TIMED_OUT

    p.produce(topic='another_topic', value='testing', partition=9,
              callback=on_delivery)

    p.poll(0.001)

    p.flush(0.002)
    p.flush()

    try:
        p.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
def test_fatal():
    """ Test fatal exceptions """

    # Configure an invalid broker and make sure the ALL_BROKERS_DOWN
    # error is seen in the error callback.
    p = Producer({'error_cb': error_cb})

    with pytest.raises(KafkaException) as exc:
        KafkaError._test_raise_fatal()
    err = exc.value.args[0]
    assert isinstance(err, KafkaError)
    assert err.fatal() is True

    p.poll(0)  # Need some p use to avoid flake8 unused warning
def test_error_cb():
    """ Test the error callback. """

    global seen_all_brokers_down

    # Configure an invalid broker and make sure the ALL_BROKERS_DOWN
    # error is seen in the error callback.
    p = Producer({'bootstrap.servers': '127.0.0.1:1', 'socket.timeout.ms': 10,
                  'error_cb': error_cb})

    t_end = time.time() + 5

    while not seen_all_brokers_down and time.time() < t_end:
        p.poll(1)

    assert seen_all_brokers_down
Esempio n. 4
0
def producer(args, sniff_timeout_ms=500, sniff_promisc=True):
    """ Captures packets from a network interface and sends them to a Kafka topic. """

    # setup the signal handler
    signal.signal(signal.SIGINT, signal_handler)

    global producer_args
    producer_args = args

    # connect to kafka
    logging.info("Connecting to Kafka; %s", args.kafka_configs)
    kafka_producer = Producer(args.kafka_configs)

    # initialize packet capture
    logging.info("Starting packet capture")
    capture = pcapy.open_live(args.interface, args.snaplen, sniff_promisc, sniff_timeout_ms)
    pkts_in = 0

    try:
        while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets):

            # capture a packet
            (pkt_hdr, pkt_raw) = capture.next()
            if pkt_hdr is not None:
                logging.debug("Packet received: pkts_in=%d, pkt_len=%s", pkts_in, pkt_hdr.getlen())
                pkts_in += 1
                pkt_ts = timestamp(pkt_hdr)
                kafka_producer.produce(args.kafka_topic, key=pack_ts(pkt_ts), value=pkt_raw, callback=delivery_callback)

                # pretty print, if needed
                if args.pretty_print > 0 and pkts_in % args.pretty_print == 0:
                    print 'Packet received[%s]' % (pkts_in)

            # serve the callback queue
            kafka_producer.poll(0)

    finally:
        # flush all messages
        logging.info("Waiting for '%d' message(s) to flush", len(kafka_producer))
        kafka_producer.flush()

        # pkts_out may not be initialized if the callback was never executed
        pkts_out = 0
        if hasattr(delivery_callback, "pkts_out"):
            pkts_out = delivery_callback.pkts_out

        logging.info("'%d' packet(s) in, '%d' packet(s) out", pkts_in, pkts_out)
Esempio n. 5
0
    {
      "id": "%s",
      "type":"alarm",
      "name": "Tap Unreachable Alarm",
      "source": "%s",
      "reason":"Heartbeat failed. TAP or Network is down",
      "time":"%s",
      "time-zone":"GMT+5:30",
      "severity":"%s",
      "suppress": false,
      "message": "TAP with IP 10.1.23.4 and with id: %s is not reachable"
    }
    """

    while True:
        source = choice(SOURCES)
        t = datetime.now().isoformat()
        global SEQ
        yield (t, template % (SEQ, source, t, choice(SEVERITY), source))
        SEQ += 1
        sleep(interval)


if __name__ == '__main__':
    for key, data in datagen():
        p.poll(0)
        p.produce(topic,
                  data.encode('utf-8'),
                  callback=delivery_report,
                  key=key)
Esempio n. 6
0
def simulate_cmd(cmd_topic):
    """

    CMD_TOPICS = ["HistoryAlarmCmd",  # 历史报警信息查询
                 "History300CRAlarmCmd",  # 300CR历史报警信息查询
                 "HistoryDigitalChangeCmd",  # 开关量变化信息查询
                 "HistoryCmd",  # 历史数据查询
                 "TestCmd"]  # 试验信息查询

    :param q1:
    :return:
    """
    broker = settings.BROKER
    topics = settings.CMD_TOPICS
    # 配置Producer
    conf = {'bootstrap.servers': broker}

    # 创建Producer实例
    p = Producer(**conf)

    if cmd_topic == "HistoryAlarmCmd":
        cmd = {"SourceName": "300MT",
               "StartTime": 1537668000000,  # 2018/9/23 10:00:00
               "EndTime": 1537668000000}  # 2018/9/23 10:00:00
        # cmd = {"SourceName": "All",
        #        "StartTime": 1537668000000,  # 2018/9/23 10:00:00
        #        "EndTime": 1537668000000}  # 2018/9/23 10:00:00
        j_cmd = json.dumps(cmd)
        p.produce(cmd_topic, j_cmd)  # 发送到kafka

    if cmd_topic == "History300CRAlarmCmd":
        cmd = {"SourceName": "300MT",
               "StartTime": 1537668000000,  # 2018/9/23 10:00:00
               "EndTime": 1537668000000}  # 2018/9/23 10:00:00
        # cmd = {"SourceName": "All",
        #        "StartTime": 1537668000000,  # 2018/9/23 10:00:00
        #        "EndTime": 1537668000000}  # 2018/9/23 10:00:00
        j_cmd = json.dumps(cmd)
        p.produce(cmd_topic, j_cmd)  # 发送到kafka

    if cmd_topic == "HistoryDigitalChangeCmd":
        cmd = {"SourceName": "905XR",
               "StartTime": 1537668000000,  # 2018/9/23 10:00:00
               "EndTime": 1537668000000}  # 2018/9/23 10:00:00
        # cmd = {"SourceName": "All",
        #        "StartTime": 1537668000000,  # 2018/9/23 10:00:00
        #        "EndTime": 1537668000000}  # 2018/9/23 10:00:00
        j_cmd = json.dumps(cmd)
        p.produce(cmd_topic, j_cmd)  # 发送到kafka

    if cmd_topic == "HistoryCmd":
        # 单时间
        cmd = {"SourceName": ["905XR", "906XR", "907XR"],
               "Time": [{"StartTime": 1537668000000, "EndTime": 1537668000000}]
               }
        # 双时间
        # cmd = {"SourceName": ["905XR", "906XR", "907XR"],
        #        "Time": [{"StartTime": 1537668000000, "EndTime": 1537668000000},
        #                 {"StartTime": 1537668000000, "EndTime": 1537668000000}]
        #        }
        j_cmd = json.dumps(cmd)
        p.produce(cmd_topic, j_cmd)  # 发送到kafka

    if cmd_topic == "TestCmd":
        # 单时间
        cmd = {"TestCmd": "TestInfo"}

        j_cmd = json.dumps(cmd)
        p.produce(cmd_topic, j_cmd)  # 发送到kafka

    p.poll(0)
    p.flush()
Esempio n. 7
0
        """Delivery report handler called on
        successful or failed delivery of message
        """
        if err is not None:
            print("Failed to deliver message: {}".format(err))
        else:
            delivered_records += 1
            print("Produced record to topic {} partition [{}] @ offset {}".
                  format(msg.topic(), msg.partition(), msg.offset()))

    with open(os.path.join(DATA_DIR, 'bcsample.json')) as data_file:
        data = json.load(data_file)

    for n in range(1000):
        record_key = "key-{}".format(n)
        record_value = json.dumps(data[n])
        print("Producing record: {}\t{}".format(record_key, record_value))
        producer.produce(topic,
                         key=record_key,
                         value=record_value,
                         on_delivery=acked)
        time.sleep(0.25)
        # p.poll() serves delivery reports (on_delivery)
        # from previous produce() calls.
        producer.poll(0)

    producer.flush()

    print("{} messages were produced to topic {}!".format(
        delivered_records, topic))
Esempio n. 8
0
    print("Total data:")
    print(len(data))
    print("---------------------------------")
    i = 0
    for _ in data:
        i += 1
        if i % 10 == 0:
            print(i)

        record_key = 'stop'
        record_value = json.dumps(_)

        logging.info(
            "Producing record: {}\t{}".format(
                record_key, record_value))
        producer.produce(topic,
                         key=record_key,
                         value=record_value,
                         on_delivery=acked
                         )
        # p.poll() serves delivery reports (on_delivery)
        # from previous produce() calls.
        producer.poll()

    producer.flush()

    print(
        "{} messages were produced to topic {}!".format(
            delivered_records,
            topic))
def producer_function():
    p= Producer({'bootstrap.servers': sys.argv[1]})

    my_input= None

    # produce while client asks to exit
    while my_input != 'exit':
        global topic
        print()
        print ("**Enter your Message for {}**".format(topic))
        my_input= input()
        if my_input == 'exit':
            p.poll(0)
            exit

        # inform the server about the logging out activity
        elif my_input == "logout":
            print ("Attempting to Logout..")
            my_input =my_input+ "-" + sys.argv[2]
            p.produce(topic, my_input.encode('utf-8'))
            time.sleep(5)
            print("Logged Out!")

        # retrieve the chat history of the client with another client
        elif my_input.find('read') != -1:
            print ("Enter the client whose messages you want to read: ")
            client= input()
            print ("Retrieving Messages..")
            time.sleep(5)
            client += ".txt"
            try:
                file= open(client, "r")
                print (file.read())
            except FileNotFoundError:
                print ("OOPS! You haven't done any chat to the requested Client.")
            finally:
                    file.close()

        # ask the server for active topics and connect to either of them or server itself
        elif my_input.find('list existing topics') != -1:
            my_input =my_input+ " " + sys.argv[2]
            p.produce('server', my_input.encode('utf-8'))
            print ("Please Wait... Processing your Request!")
            print ("Existing Topics: ")
            time.sleep(5)
            my_input= input()
            topic= my_input
            print ("You chose to connect to: {}".format(topic))
            contact_list.add(my_input)

        # connect to the server from any stage
        elif my_input == 'connect to server':
            print("Attempting to Connect..")
            time.sleep(5)
            topic= 'server'
            print ("Connection Successful!")

        # retrieve the list of contacts
        elif my_input == 'contact list':
            temp_topic = topic
            topic= sys.argv[2]
            for contact in contact_list:
                print (contact)
            topic= temp_topic

        # else send a message to the client/server you want to chat to
        else:
            my_input =my_input+ "-" + sys.argv[2]
            p.produce(topic, my_input.encode('utf-8'), callback = delivery_report)
def main():
    # kafka
    consumer = Consumer(collectd_cfg['consumer'])
    consumer.subscribe([collectd_cfg['raw_data_topic']])
    producer = Producer(collectd_cfg['producer'])
    # Trigger any available delivery report callbacks from previous produce() calls
    # see: https://github.com/confluentinc/confluent-kafka-python/issues/16
    producer.poll(0)

    # influxdb
    influxdb_client = InfluxDBClient(host=influxdb_cfg['server'],
                                     database=influxdb_cfg['database'])
    influxdb_client.create_database(influxdb_cfg['database'])
    influxdb_client.create_retention_policy(name="infinite",
                                            duration='INF',
                                            replication=1,
                                            database=influxdb_cfg['database'],
                                            default=True)

    influxdb_data_points = []

    influxdb_to_send = threading.Event()
    influxdb_to_stop = threading.Event()
    influxdb_to_stop.clear()
    influxdb_to_send.clear()

    def set_influxdb_to_send():
        while not influxdb_to_stop.isSet():
            sleep(0.5)
            influxdb_to_send.set()

    influxdb_flag_thread = threading.Thread(target=set_influxdb_to_send)
    influxdb_flag_thread.start()

    logger.info("Start processing collectd data ...")

    try:
        while True:
            msg = consumer.poll(1.0)
            if msg is None:
                continue
            if msg.error():
                logger.error("Consumer error: {}".format(msg.error()))
                continue

            measurements = extract(msg)

            # Send extracted data to kafka topics
            # Asynchronously produce a message, the delivery report callback
            # will be triggered from poll() above, or flush() below, when the message has
            # been successfully delivered or failed permanently.
            for item in measurements:
                producer.produce(topic='collectd',
                                 value=str({item[0]: item[1]}),
                                 timestamp=item[2],
                                 callback=delivery_report)
                producer.poll(0)

            # Send extracted data to influxdb, but batching (only send every 1s)
            for item in measurements:
                influxdb_data_points.append({
                    "measurement": item[0],
                    # timestamp from ms in collectd to ns in influxdb
                    "time": int(item[2]) * 10**6,
                    "fields": {
                        "value": item[1],
                    }
                })
            if influxdb_to_send.isSet():
                influxdb_client.write_points(influxdb_data_points)
                influxdb_to_send.clear()
                influxdb_data_points = []

    except KeyboardInterrupt:
        # Wait for any outstanding messages to be delivered and delivery report
        # callbacks to be triggered.
        producer.flush()
        consumer.close()
        influxdb_to_stop.set()
Esempio n. 11
0
def test_cluster(bootstrap_ip, bootstrap_port):
    p = Producer({'bootstrap.servers': f'{bootstrap_ip}:{bootstrap_port}'})
    p.produce('cluster-test', 'Cluster-Test', callback=test_ok)
    p.poll(5)
Esempio n. 12
0
class KafkaSink(SandcrawlerWorker):
    def __init__(self, kafka_hosts: str, produce_topic: str, **kwargs):
        self.sink = None
        self.counts = Counter()
        self.produce_topic = produce_topic
        self.kafka_hosts = kafka_hosts

        config = self.producer_config({
            "bootstrap.servers": kafka_hosts,
            "message.max.bytes": 30000000,  # ~30 MBytes; broker is ~50 MBytes
            "api.version.request": True,
            "api.version.fallback.ms": 0,
        })
        self.producer = Producer(config)

    @staticmethod
    def _fail_fast(err: Any, msg: Any) -> None:
        if err is not None:
            print("Kafka producer delivery error: {}".format(err),
                  file=sys.stderr)
            print("Bailing out...", file=sys.stderr)
            # TODO: should it be sys.exit(-1)?
            raise KafkaException(err)

    def producer_config(self, kafka_config: dict) -> dict:
        config = kafka_config.copy()
        config.update({
            "delivery.report.only.error": True,
            "default.topic.config": {
                "message.timeout.ms": 30000,
                "request.required.acks": -1,  # all brokers must confirm
            },
        })
        return config

    def push_record(self, msg: Any, key: Optional[str] = None) -> Any:
        self.counts["total"] += 1
        if type(msg) == dict:
            if not key and "key" in msg:
                key = msg["key"]
            msg = json.dumps(msg)
        if type(msg) == str:
            msg = msg.encode("utf-8")
        assert type(msg) == bytes

        self.producer.produce(self.produce_topic,
                              msg,
                              key=key,
                              on_delivery=self._fail_fast)
        self.counts["produced"] += 1

        # check for errors etc
        self.producer.poll(0)

    def push_batch(self, msgs: List[Any]) -> List[Any]:
        for m in msgs:
            self.push_record(m)
        return []

    def finish(self) -> Counter:
        self.producer.flush()
        return self.counts
Esempio n. 13
0
class CompetitionProducer:
    """

    """
    daemon = True
    producer = None

    def __init__(self, server):
        conf = {'bootstrap.servers': server}
        self.producer = Producer(conf)  # Create producer

    # message must be in byte format
    def send(self, topic, message):
        self.producer.produce(topic, message)  # Sending messages to a certain topic
        self.producer.poll(timeout=0)

    def main(self, topic, initial_batch, items, predictions, initial_training_time, batch_size, time_interval,
             predictions_time_interval, spark_topic, competition_id):

        """
        Recreates the stream. Sends the data in batches: first test (without the target value) and then train batches.
        All batches are sent according to the time intervals set for the current competition.

        :param topic:
        :param initial_batch:
        :param items:
        :param predictions:
        :param initial_training_time:
        :param batch_size:
        :param time_interval:
        :param predictions_time_interval:
        :param spark_topic:
        :param competition_id:
        :return:
        """

        for item in initial_batch:
            try:
                # Send row by row from initial batch as json
                self.send(topic, orjson.dumps(item))
            except Exception as e:
                # Check if topic exists, if not, create it and then send
                print(e)

        # After sending initial batch, sleep for initial training time
        time.sleep(int(initial_training_time))
        # Creating lists of batch size, one for test items with just values and second with predictions for training
        test_groups = list(self.chunker(items, batch_size))
        train_groups = list(self.chunker(predictions, batch_size))

        i = -1

        # Accessing each group in the list test_groups
        for group in test_groups:
            # In parallel accessing the predictions
            # Adding tag, deadline and released at to every item in train group / prediction
            released_at = datetime.datetime.now()
            # for item in test group add tag, deadline and released
            for item in group:
                item['tag'] = 'TEST'
                item['Deadline'] = str(released_at + datetime.timedelta(seconds=int(predictions_time_interval)))
                item['Released'] = str(released_at)
                item['competition_id'] = str(competition_id)
                # Sending testing items
                try:
                    self.send(topic, orjson.dumps(item))

                except Exception as e:
                    print(e)

            i = i + 1
            train_group = train_groups[i]
            for item in train_group:
                deadline = released_at + datetime.timedelta(seconds=int(predictions_time_interval))
                item['Deadline'] = deadline.strftime("%Y-%m-%d %H:%M:%S")
                item['Released'] = released_at.strftime("%Y-%m-%d %H:%M:%S")
                item['competition_id'] = competition_id
                try:
                    self.send(spark_topic, orjson.dumps(item))
                except Exception as e:
                    print(e)

            time.sleep(time_interval)

            for item in train_group:
                item['tag'] = 'TRAIN'
                item['Deadline'] = released_at + datetime.timedelta(seconds=int(predictions_time_interval))
                item['Released'] = released_at
                try:
                    self.send(topic, orjson.dumps(item, default=json_util.default))
                except Exception as e:
                    print(e)

        time.sleep(time_interval)

        self.producer.flush()

    @staticmethod
    def chunker(seq, size):
        """ Returns data in chunks (batches) of a given size. """
        return (seq[pos:pos + size] for pos in range(0, len(seq), size))

    @staticmethod
    def is_not_empty(row):
        """Check if row is empty."""
        return all(item == "" for item in row)


    def create_competition(self, competition, items, predictions, initial_batch):
        """Create a competition and start releasing the data stream."""
        self.main(
            topic=competition.name.lower().replace(" ", ""),
            initial_training_time=competition.initial_training_time,
            initial_batch=initial_batch,
            items=items,
            predictions=predictions,
            batch_size=competition.batch_size,
            time_interval=competition.time_interval,
            predictions_time_interval=competition.predictions_time_interval,
            spark_topic=competition.name.lower().replace(" ", "") + 'spark_train',
            competition_id=competition.competition_id)
Esempio n. 14
0
class KafkaStreamingClient(AbstractStreamingClient):
    """Kafka streaming client."""
    def __init__(self, config):  # pragma: no cover
        """
        Streaming client implementation based on Kafka.

        Configuration keys:
          KAFKA_ADDRESS
          KAFKA_CONSUMER_GROUP
          KAFKA_TOPIC
          TIMEOUT
          EVENTHUB_KAFKA_CONNECTION_STRING
        """
        self.logger = Logger()

        self.topic = config.get("KAFKA_TOPIC")
        if config.get("TIMEOUT"):
            try:
                self.timeout = int(config.get("TIMEOUT"))
            except ValueError:
                self.timeout = None
        else:
            self.timeout = None

        kafka_config = self.create_kafka_config(config)
        self.admin = admin.AdminClient(kafka_config)

        if config.get("KAFKA_CONSUMER_GROUP") is None:
            self.logger.info('Creating Producer')
            self.producer = Producer(kafka_config)
        else:
            self.logger.info('Creating Consumer')
            self.consumer = Consumer(kafka_config)

    @staticmethod
    def create_kafka_config(user_config: dict) -> dict:  # pragma: no cover
        """Create the kafka configuration."""
        config = {
            "bootstrap.servers": user_config.get("KAFKA_ADDRESS"),
            "enable.auto.commit": False,
            "auto.offset.reset": "earliest",
            "default.topic.config": {
                'auto.offset.reset': 'smallest'
            },
        }

        if user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'):
            ssl_location = user_config.get(
                'SSL_CERT_LOCATION') or '/etc/ssl/certs/ca-certificates.crt'
            eventhub_config = {
                'security.protocol':
                "SASL_SSL",
                'sasl.mechanism':
                "PLAIN",
                'ssl.ca.location':
                ssl_location,
                'sasl.username':
                '******',
                'sasl.password':
                user_config.get('EVENTHUB_KAFKA_CONNECTION_STRING'),
                'client.id':
                'agogosml',
            }
            config = {**config, **eventhub_config}

        if user_config.get('KAFKA_CONSUMER_GROUP') is not None:
            config['group.id'] = user_config['KAFKA_CONSUMER_GROUP']

        if user_config.get('KAFKA_DEBUG') is not None:
            config['debug'] = user_config['KAFKA_DEBUG']

        return config

    def delivery_report(self, err, msg):  # pragma: no cover
        """
        Indicate delivery result.

        Called once for each message produced. Triggered by poll() or flush().

        :param err: An error message.
        :param msg: A string input to be uploaded to kafka.
        """
        if err is not None:
            self.logger.error('Message delivery failed: %s', err)
        else:
            self.logger.info('Message delivered to %s [%s]', msg.topic(),
                             msg.partition())

    def send(self, message: str):  # pragma: no cover
        if not isinstance(message, str):
            raise TypeError('str type expected for message')
        try:
            mutated_message = message.encode('utf-8')
            self.logger.info('Sending message to kafka topic: %s', self.topic)
            self.producer.poll(0)
            self.producer.produce(self.topic,
                                  mutated_message,
                                  callback=self.delivery_report)
            self.producer.flush()
            return True
        except Exception as ex:
            self.logger.error('Error sending message to kafka: %s', ex)
            return False

    def stop(self):  # pragma: no cover
        pass

    def check_timeout(self, start: datetime):  # pragma: no cover
        """Interrupts if too much time has elapsed since the kafka client started running."""
        if self.timeout is not None:
            elapsed = datetime.now() - start
            if elapsed.seconds >= self.timeout:
                raise KeyboardInterrupt

    def handle_kafka_error(self, msg):  # pragma: no cover
        """Handle an error in kafka."""
        if msg.error().code() == KafkaError._PARTITION_EOF:
            # End of partition event
            self.logger.info('%% %s [%d] reached end at offset %d\n',
                             msg.topic(), msg.partition(), msg.offset())
        else:
            # Error
            raise KafkaException(msg.error())

    def start_receiving(self,
                        on_message_received_callback):  # pragma: no cover
        try:
            self.subscribe_to_topic()
            start = datetime.now()

            while True:
                # Stop loop after timeout if exists
                self.check_timeout(start)

                # Poll messages from topic
                msg = self.read_single_message()
                if msg is not None:
                    on_message_received_callback(msg)

        except KeyboardInterrupt:
            self.logger.info('Aborting listener...')

        finally:
            # Close down consumer to commit final offsets.
            self.consumer.close()

    def subscribe_to_topic(self):  # pragma: no cover
        """Subscribe to topic."""
        self.consumer.subscribe([self.topic])

    def read_single_message(self):  # pragma: no cover
        """Poll messages from topic."""
        msg = self.consumer.poll(0.000001)

        if msg is None:
            return None

        if msg.error():
            # Error or event
            self.handle_kafka_error(msg)
            return None

        # Proper message
        # self.logger.info('kafka read message: %s, from topic: %s', msg.value(), msg.topic())
        self.consumer.commit(msg)
        return msg.value()
Esempio n. 15
0
    def test_ingester(self):

        init_db_sync(config=config, verbose=True)

        log("Setting up paths")
        # path_kafka = pathlib.Path(config["path"]["kafka"])

        path_logs = pathlib.Path(config["path"]["logs"])
        if not path_logs.exists():
            path_logs.mkdir(parents=True, exist_ok=True)

        if config["misc"]["broker"]:
            log("Setting up test groups and filters in Fritz")
            program = Program(group_name="FRITZ_TEST", group_nickname="test")
            Filter(
                collection="ZTF_alerts",
                group_id=program.group_id,
                filter_id=program.filter_id,
            )

            program2 = Program(group_name="FRITZ_TEST_AUTOSAVE", group_nickname="test2")
            Filter(
                collection="ZTF_alerts",
                group_id=program2.group_id,
                filter_id=program2.filter_id,
                autosave=True,
                pipeline=[{"$match": {"objectId": "ZTF20aaelulu"}}],
            )

            program3 = Program(
                group_name="FRITZ_TEST_UPDATE_ANNOTATIONS", group_nickname="test3"
            )
            Filter(
                collection="ZTF_alerts",
                group_id=program3.group_id,
                filter_id=program3.filter_id,
                update_annotations=True,
                pipeline=[
                    {"$match": {"objectId": "ZTF20aapcmur"}}
                ],  # there are 3 alerts in the test set for this oid
            )

        # clean up old Kafka logs
        log("Cleaning up Kafka logs")
        subprocess.run(["rm", "-rf", path_logs / "kafka-logs", "/tmp/zookeeper"])

        log("Starting up ZooKeeper at localhost:2181")

        # start ZooKeeper in the background
        cmd_zookeeper = [
            os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-start.sh"),
            "-daemon",
            os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"),
        ]

        with open(path_logs / "zookeeper.stdout", "w") as stdout_zookeeper:
            # p_zookeeper =
            subprocess.run(
                cmd_zookeeper, stdout=stdout_zookeeper, stderr=subprocess.STDOUT
            )

        # take a nap while it fires up
        time.sleep(3)

        log("Starting up Kafka Server at localhost:9092")

        # start the Kafka server:
        cmd_kafka_server = [
            os.path.join(config["path"]["kafka"], "bin", "kafka-server-start.sh"),
            "-daemon",
            os.path.join(config["path"]["kafka"], "config", "server.properties"),
        ]

        with open(
            os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w"
        ) as stdout_kafka_server:
            # p_kafka_server = subprocess.Popen(cmd_kafka_server, stdout=stdout_kafka_server, stderr=subprocess.STDOUT)
            # p_kafka_server =
            subprocess.run(cmd_kafka_server)

        # take a nap while it fires up
        time.sleep(3)

        # get kafka topic names with kafka-topics command
        cmd_topics = [
            os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"),
            "--zookeeper",
            config["kafka"]["zookeeper.test"],
            "-list",
        ]

        topics = (
            subprocess.run(cmd_topics, stdout=subprocess.PIPE)
            .stdout.decode("utf-8")
            .split("\n")[:-1]
        )
        log(f"Found topics: {topics}")

        # create a test ZTF topic for the current UTC date
        date = datetime.datetime.utcnow().strftime("%Y%m%d")
        topic_name = f"ztf_{date}_programid1_test"

        if topic_name in topics:
            # topic previously created? remove first
            cmd_remove_topic = [
                os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"),
                "--zookeeper",
                config["kafka"]["zookeeper.test"],
                "--delete",
                "--topic",
                topic_name,
            ]
            # print(kafka_cmd)
            remove_topic = (
                subprocess.run(cmd_remove_topic, stdout=subprocess.PIPE)
                .stdout.decode("utf-8")
                .split("\n")[:-1]
            )
            log(f"{remove_topic}")
            log(f"Removed topic: {topic_name}")
            time.sleep(1)

        if topic_name not in topics:
            log(f"Creating topic {topic_name}")

            cmd_create_topic = [
                os.path.join(config["path"]["kafka"], "bin", "kafka-topics.sh"),
                "--create",
                "--bootstrap-server",
                config["kafka"]["bootstrap.test.servers"],
                "--replication-factor",
                "1",
                "--partitions",
                "1",
                "--topic",
                topic_name,
            ]
            with open(
                os.path.join(config["path"]["logs"], "create_topic.stdout"), "w"
            ) as stdout_create_topic:
                # p_create_topic = \
                subprocess.run(
                    cmd_create_topic,
                    stdout=stdout_create_topic,
                    stderr=subprocess.STDOUT,
                )

        log("Starting up Kafka Producer")

        # spin up Kafka producer
        producer = Producer(
            {"bootstrap.servers": config["kafka"]["bootstrap.test.servers"]}
        )

        # small number of alerts that come with kowalski
        path_alerts = pathlib.Path("/app/data/ztf_alerts/20200202/")
        # grab some more alerts from gs://ztf-fritz/sample-public-alerts
        try:
            log("Grabbing more alerts from gs://ztf-fritz/sample-public-alerts")
            r = requests.get("https://www.googleapis.com/storage/v1/b/ztf-fritz/o")
            aa = r.json()["items"]
            ids = [pathlib.Path(a["id"]).parent for a in aa if "avro" in a["id"]]
        except Exception as e:
            log(
                "Grabbing alerts from gs://ztf-fritz/sample-public-alerts failed, but it is ok"
            )
            log(f"{e}")
            ids = []
        subprocess.run(
            [
                "gsutil",
                "-m",
                "cp",
                "-n",
                "gs://ztf-fritz/sample-public-alerts/*.avro",
                "/app/data/ztf_alerts/20200202/",
            ]
        )
        log(f"Fetched {len(ids)} alerts from gs://ztf-fritz/sample-public-alerts")
        # push!
        for p in path_alerts.glob("*.avro"):
            with open(str(p), "rb") as data:
                # Trigger any available delivery report callbacks from previous produce() calls
                producer.poll(0)

                log(f"Pushing {p}")

                # Asynchronously produce a message, the delivery report callback
                # will be triggered from poll() above, or flush() below, when the message has
                # been successfully delivered or failed permanently.
                producer.produce(topic_name, data.read(), callback=delivery_report)

                # Wait for any outstanding messages to be delivered and delivery report
                # callbacks to be triggered.
        producer.flush()

        log("Starting up Ingester")

        # digest and ingest
        watchdog(obs_date=date, test=True)
        log("Digested and ingested: all done!")

        # shut down Kafka server and ZooKeeper
        time.sleep(20)

        log("Shutting down Kafka Server at localhost:9092")
        # start the Kafka server:
        cmd_kafka_server_stop = [
            os.path.join(config["path"]["kafka"], "bin", "kafka-server-stop.sh"),
            os.path.join(config["path"]["kafka"], "config", "server.properties"),
        ]

        with open(
            os.path.join(config["path"]["logs"], "kafka_server.stdout"), "w"
        ) as stdout_kafka_server:
            # p_kafka_server_stop = \
            subprocess.run(
                cmd_kafka_server_stop,
                stdout=stdout_kafka_server,
                stderr=subprocess.STDOUT,
            )

        log("Shutting down ZooKeeper at localhost:2181")
        cmd_zookeeper_stop = [
            os.path.join(config["path"]["kafka"], "bin", "zookeeper-server-stop.sh"),
            os.path.join(config["path"]["kafka"], "config", "zookeeper.properties"),
        ]

        with open(
            os.path.join(config["path"]["logs"], "zookeeper.stdout"), "w"
        ) as stdout_zookeeper:
            # p_zookeeper_stop = \
            subprocess.run(
                cmd_zookeeper_stop, stdout=stdout_zookeeper, stderr=subprocess.STDOUT
            )

        log("Checking the ZTF alert collection states")
        mongo = Mongo(
            host=config["database"]["host"],
            port=config["database"]["port"],
            replica_set=config["database"]["replica_set"],
            username=config["database"]["username"],
            password=config["database"]["password"],
            db=config["database"]["db"],
            verbose=True,
        )
        collection_alerts = config["database"]["collections"]["alerts_ztf"]
        collection_alerts_aux = config["database"]["collections"]["alerts_ztf_aux"]
        n_alerts = mongo.db[collection_alerts].count_documents({})
        assert n_alerts == 313
        n_alerts_aux = mongo.db[collection_alerts_aux].count_documents({})
        assert n_alerts_aux == 145

        if config["misc"]["broker"]:
            log("Checking that posting to SkyPortal succeeded")

            # check number of candidates that passed the first filter
            resp = requests.get(
                program.base_url + f"/api/candidates?groupIDs={program.group_id}",
                headers=program.headers,
                timeout=3,
            )

            assert resp.status_code == requests.codes.ok
            result = resp.json()
            assert result["status"] == "success"
            assert "data" in result
            assert "totalMatches" in result["data"]
            assert result["data"]["totalMatches"] == 88

            # check that the only candidate that passed the second filter (ZTF20aaelulu) got saved as Source
            resp = requests.get(
                program2.base_url + f"/api/sources?group_ids={program2.group_id}",
                headers=program2.headers,
                timeout=3,
            )

            assert resp.status_code == requests.codes.ok
            result = resp.json()
            assert result["status"] == "success"
            assert "data" in result
            assert "totalMatches" in result["data"]
            assert result["data"]["totalMatches"] == 1
            assert "sources" in result["data"]
            assert result["data"]["sources"][0]["id"] == "ZTF20aaelulu"
def producer_trigger(raw_data, context):
    state_stats_url = ('https://api.covid19india.org/data.json')
    district_stats_url = ('https://api.covid19india.org/v2/state_district_wise.json')
    bootstrap_servers = "localhost:9092"
    kafka_district_data_topic_name = "district-data"
    kafka_processed_data_topic_name = "processed-data"
   

    conf = {'bootstrap.servers': bootstrap_servers}

    producer = Producer(conf, logger=logger)

    # import raw district data
    district_data = requests.get(district_stats_url).json()
    for data in district_data:
        state = data['state']
        district_data = data['districtData']
        for dd in district_data:
            district = dd['district']
            key = dict({'state': state, 'district': district})
            value = dict({'state': state, 'district': district, 'active': dd['active'], 'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']
                          })
            try:
                producer.produce(topic=kafka_district_data_topic_name, value=json.dumps(value), key=json.dumps(key),
                                 on_delivery=fail)
            except BufferError:
                logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' %
                             len(producer))
            producer.poll(0)
    logger.info('%% Waiting for %d deliveries\n' % len(producer))
    producer.flush()

    district_data = requests.get(district_stats_url).json()
    for data in district_data:
        state = data['state']
        district_data = data['districtData']
        finalDict = {}
        for dd in district_data:
            district = dd['district']
            key = dict({'state': state, 'district': district})
            if(dd['active'] < 200):
                finalDict.update({ 'low_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'],'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']})})
            elif(dd['active'] > 200 and dd['active'] < 800):
                finalDict.update({ 'moderate_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']})})
            elif(dd['active'] > 800):
                finalDict.update({ 'high_risk_zone' : dict({'state': state, 'district': district, 'active': dd['active'] , 'confirmed': dd['confirmed'],
                          'recovered': dd['recovered'], 'deceased': dd['deceased'],
                          'deltaConfirmed': dd['delta']['confirmed'],
                          'deltaRecovered': dd['delta']['recovered'], 'deltaDeceased': dd['delta']['deceased'],
                          'notes': dd['notes']})})
            try:
                producer.produce(topic=kafka_processed_data_topic_name, value=json.dumps(finalDict), key=json.dumps(key),
                                 on_delivery=fail)
            except BufferError:
                logger.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' %
                             len(producer))
            producer.poll(0)
    logger.info('%% Waiting for %d deliveries\n' % len(producer))
    producer.flush()
Esempio n. 17
0
class SimPortClass:
    ''' Our class to handle an ODC Port. We must have __init__, ProcessJSONConfig, Enable, Disable, EventHander, TimerHandler and
    RestRequestHandler defined, as they will be called by our c/c++ code.
    ODC publishes some functions to this Module (when run) they are part of the odc module(include).
    We currently have odc.log, odc.SetTimer and odc.PublishEvent.
    '''

    # Worker Methods. They need to be high in the code so they are available in the code below. No forward declaration in Python
    def LogTrace(self, message):
        odc.log(self.guid, Trace, message)

    def LogError(self, message):
        odc.log(self.guid, Error, message)

    def LogDebug(self, message):
        odc.log(self.guid, Debug, message)

    def LogInfo(self, message):
        odc.log(self.guid, Info, message)

    def LogWarn(self, message):
        odc.log(self.guid, Warn, message)

    def LogCritical(self, message):
        odc.log(self.guid, Critical, message)

    # Mandatory Methods that are called by ODC PyPort

    def __init__(self, odcportguid, objectname):
        self.objectname = objectname  # Documentation/error use only.
        self.guid = odcportguid  # So that when we call an odc method, ODC can work out which pyport to hand it too.
        self.Enabled = False
        self.MessageIndex = 0
        self.EventQueueSize = 0
        self.QueueErrorState = 0  # 0 - No Error, 1 - Error, Notified,
        self.SendErrorState = 0  # As above
        self.LastMessageIndex = self.MessageIndex
        self.StartTimeSeconds = time.time()
        self.timestart = 1.1  # Used for profiling, setup as a float
        self.measuretimeus = 0
        self.measuretimeus2 = 0
        self.ConfigDict = {}  # Config Dictionary
        self.LogInfo(
            "PyPortKafka - SimPortClass Init Called - {}".format(objectname))
        self.LogDebug("Python sys.path - {}".format(sys.path))
        self.producer = None
        return

        # time.perf_counter_ns() (3.6 up) gives the time including include sleeps time.process_time() is overall time.
    def timeusstart(self):
        self.timestart = time.perf_counter()  #float fractions of a second

    def timeusstop(self):
        return int((time.perf_counter() - self.timestart) * 1000000)

    def minutetimermessage(self):
        DeltaSeconds = time.time() - self.StartTimeSeconds
        self.LogError(
            "PyPortKafka status. Event Queue {}, Messages Processed - {}, Messages/Second - {}"
            .format(
                self.EventQueueSize, self.MessageIndex,
                math.floor((self.MessageIndex - self.LastMessageIndex) /
                           DeltaSeconds)))
        self.StartTimeSeconds = time.time()
        self.LastMessageIndex = self.MessageIndex
        return

    def Config(self, MainJSON, OverrideJSON):
        """ The JSON values are passed as strings (stripped of comments), which we then load into a dictionary for processing
        Note that this does not handle Inherits JSON entries correctly (Inherits is effectily an Include file entry)"""
        #self.LogDebug("Passed Main JSON Config information - Len {} , {}".format(len(MainJSON),MainJSON))
        #self.LogDebug("Passed Override JSON Config information - Len {} , {}".format(len(OverrideJSON), OverrideJSON))

        # Load JSON into Dicts
        Override = {}
        try:
            if len(MainJSON) != 0:
                self.ConfigDict = json.loads(MainJSON)
            if len(OverrideJSON) != 0:
                Override = json.loads(OverrideJSON)
        except:
            self.LogError("Exception on parsing JSON Config data - {}".format(
                sys.exc_info()[0]))
            return

        self.LogDebug("JSON Config strings Parsed")

        # Now use the override config settings to adjust or add to the MainConfig. Only root json values can be adjusted.
        # So you cannot change a single value in a Binary point definition without rewriting the whole "Binaries" json key.
        self.ConfigDict.update(
            Override
        )  # Merges with Override doing just that - no recursion into sub dictionaries

        #self.LogTrace("Combined (Merged) JSON Config {}".format(json.dumps(self.ConfigDict)))

        # Now extract what is needed for this instance, or just reference the ConfigDict when needed.
        kafkaserver = self.ConfigDict["bootstrap.servers"]
        self.topic = self.ConfigDict["Topic"]

        # The acks can be 0, 1, 2 etc or all. It is the number of nodes that have to have written the message before we get acknoledgement.
        # So a value of 0 is fire and forget. etc.
        # Details of what can be passed see: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        # We require all of the nodes that are configured to ack for the data to be valid - we dont control it here.
        # In the cluster config it is set to 2.
        #
        # Really interesting discussion of how to loose messages in Kafka (but also how not to loose messages!)
        # https://jack-vanlightly.com/blog/2018/9/14/how-to-lose-messages-on-a-kafka-cluster-part1
        #
        # To control batching - the values are the defaults: 'batch.num.messages': 10000 OR 'message.max.bytes' : 1000000,
        # queuing.strategy=fifo an attempt to make sure messages arrive in the order they are sent..
        # max.in.flight.requests.per.connection=5 Usually a very large number, 5 does not seem to slow things down - maybe 1%?
        # request.required.acks are the number of copies of the message pushed to the other nodes. Really dont need any acks, if the primary has the message, it will replicate shortly. Or could require an ack from at least 1 node
        # 'compression.type':'none' - does not seem to make much difference (none,snappy, gzip)
        # 'delivery.report.only.error':False    This does make things quicker from approx 70k/sec to 74k/sec
        # 'message.send.max.retries': 100000 - will not drop the message until this many attempts have been made...
        conf = {
            'bootstrap.servers': kafkaserver,
            'client.id': 'OpenDataCon',
            'delivery.report.only.error': True,
            'message.send.max.retries': 10000000,
            'request.required.acks': 0,
            'max.in.flight.requests.per.connection': 100
        }
        self.producer = Producer(conf)
        return

    def Operational(self):
        """ This is called from ODC once ODC is ready for us to be fully operational - normally after Build is complete"""
        self.LogDebug("Port Operational - {}".format(
            datetime.now().isoformat(" ")))
        # This is only done once - will self restart from the timer callback.
        odc.SetTimer(self.guid, 1, 500)  # Start the timer cycle
        odc.SetTimer(self.guid, 2,
                     10000)  # First status message after 10 seconds
        return

    def Enable(self):
        self.LogDebug("Enabled - {}".format(datetime.now().isoformat(" ")))
        self.enabled = True
        return

    def Disable(self):
        self.LogDebug("Disabled - {}".format(datetime.now().isoformat(" ")))
        self.enabled = False
        return

    # Not used
    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """
        if err is not None:
            if self.SendErrorState == 0:
                self.LogError("Kafka Send Message Error - {} [{}] - {}".format(
                    msg.topic(), msg.partition(), err))
                self.SendErrorState = 1
        else:
            if self.SendErrorState == 1:
                self.LogError(
                    "Kafka Send Message Error Cleared - {} [{}]".format(
                        msg.topic(), msg.partition()))
                self.SendErrorState = 0

    # Needs to return True or False, which will be translated into CommandStatus::SUCCESS or CommandStatus::UNDEFINED
    # EventType (string) Index (int), Time (msSinceEpoch), Quality (string) Payload (string) Sender (string)
    # There is no callback available, the ODC code expects this method to return without delay.
    def EventHandler(self, EventType, Index, Time, Quality, Payload, Sender):
        # self.LogDebug("EventHander: {}, {}, {} {} - {}".format(self.guid,Sender,Index,EventType,Payload))

        if (EventType == "ConnectState"):
            return True

        self.LogError("Events must be queued {}".format(EventType))

        # Always return True - we processed the message - even if we could not pass it to Kafka.
        return True

    def millisdiff(self, starttimedate):
        dt = datetime.now() - starttimedate
        ms = (dt.days * 24 * 60 * 60 +
              dt.seconds) * 1000 + dt.microseconds / 1000.0
        return ms

    # Will be called at the appropriate time by the ASIO handler system. Will be passed an id for the timeout,
    # so you can have multiple timers running.
    def TimerHandler(self, TimerId):
        # self.LogDebug("TimerHander: ID {}, {}".format(TimerId, self.guid))

        if (self.producer is not None):
            self.producer.poll(0)  # Do any waiting processing, but dont wait!

        if (TimerId == 1):

            MaxMessageCount = 5000
            longwaitmsec = 100
            shortwaitmsec = 5
            EventCount = 1
            starttime = datetime.now()
            self.measuretimeus = 0
            self.measuretimeus2 = 0

            if (self.producer is not None):
                # Get Events from the queue and process them, up until we have an empty queue or MaxMessageCount entries
                # Then trigger the Kafka library to send them.

                while ((EventCount < MaxMessageCount)):
                    EventCount += 1

                    self.timeusstart()
                    ### Takes about 3.2usec (old 8.4usec) per call (approx) on DEV server
                    JsonEventstr, empty = odc.GetNextEvent(self.guid)
                    self.measuretimeus += self.timeusstop()

                    # The EventType will be an empty string if the queue is empty.
                    if (empty == True):
                        break

                    try:
                        self.timeusstart()
                        # Now 32msec/5000, about 5usec per record. (old 45msec/5000 so 9usec/record)
                        # Can we only get a single delivery report per block of up to 5000 messages?
                        # If we set the re-try count to max int, then handling the delivery report does not make much sense - the buffer will just fill up and then we will get an exception
                        # here due to a full buffer. And we fill up the next buffer (in PyPort) (note we need to store the event we were about to send so we dont loose it!)
                        # Eventually we will loose events, but there is nothing we can do about that.
                        self.producer.produce(self.topic, value=JsonEventstr)
                        self.measuretimeus2 += self.timeusstop()
                        if self.QueueErrorState == 1:
                            self.LogError(
                                "Kafka Producer Queue Recovered - NOT full ({} messages awaiting delivery)"
                                .format(len(self.producer)))
                            self.QueueErrorState = 0

                    except BufferError:
                        if self.QueueErrorState == 0:
                            self.LogError(
                                "Kafka Producer Queue is full ({} messages awaiting delivery)"
                                .format(len(self.producer)))
                            self.QueueErrorState = 1
                        break

                    if (EventCount % 100 == 0):
                        self.producer.poll(
                            0)  # Do any waiting processing, but dont wait!

                self.EventQueueSize = odc.GetEventQueueSize(self.guid)
                #self.LogDebug("Kafka Produced {} messages. Kafka queue size {}. ODC Event queue size {} Execution time {} msec Timed code {}, {} us".format(EventCount,len(self.producer),self.EventQueueSize,self.millisdiff(starttime),self.measuretimeus,self.measuretimeus2))

                self.MessageIndex += EventCount

            # If we have pushed the maximum number of events in, we need to go faster...
            # If the producer queue hits the limit, this means the kafka cluster is not keeping up.
            if EventCount < MaxMessageCount:
                odc.SetTimer(
                    self.guid, 1,
                    longwaitmsec)  # We do not have messages waiting...
            else:
                odc.SetTimer(self.guid, 1,
                             shortwaitmsec)  # We do have messages waiting

        if (TimerId == 2):
            self.minutetimermessage()
            odc.SetTimer(self.guid, 2, 10000)  # Set to run again in 10 seconds

        if (self.producer is not None):
            self.producer.poll(0)  # Do any waiting processing, but dont wait!

        return

    # The Rest response interface - the following method will be called whenever the restful interface (a single interface for all PythonPorts) gets
    # called. It will be decoded sufficiently so that it is passed to the correct PythonPort (us)
    # To make these calls in Python (our test scripts) we can use the library below.
    # https://2.python-requests.org//en/master/
    #
    # We return the response that we want sent back to the caller. This will be a JSON string. A null string would be an error.
    def RestRequestHandler(self, url, content):
        self.LogDebug("RestRequestHander: {}".format(url))

        Response = {}  # Empty Dict
        if ("GET" in url):
            DeltaSeconds = time.time() - self.StartTimeSeconds
            Response[
                "Status"] = "PyPortKafka Processing is running. Messages Processed - {}, Messages/Second - {}".format(
                    self.MessageIndex,
                    math.floor((self.MessageIndex - self.LastMessageIndex) /
                               DeltaSeconds))

            self.StartTimeSeconds = time.time()
            self.LastMessageIndex = self.MessageIndex

        return json.dumps(Response)
Esempio n. 18
0
class CallReport:
    """
    If the CRM system send two reports per call this class will handle the first one.
    FinalCallReport will handle the final report.
    """
    def __init__(self, group=None, consumer_topic=None, producer_topic=None):
        self._producer = Producer({
            "bootstrap.servers": "",
            "security.protocol": "SASL_SSL",
            "sasl.mechanisms": "PLAIN",
            "sasl.username": "",
            "sasl.password": ""
        })
        self._group = group
        self._consumer_topic = consumer_topic
        self._consumer = None
        self._producer_topic = producer_topic

    @abstractmethod
    def start(self):
        """
        Example implementation for stream case
        """
        self._consumer = Consumer({
            "bootstrap.servers": "",
            "security.protocol": "SASL_SSL",
            "sasl.mechanisms": "PLAIN",
            "sasl.username": "",
            "sasl.password": "",
            'group.id': self._group,
            'enable.auto.commit': False,
            'auto.offset.reset': 'earliest'
        })
        self._consumer.subscribe([self._consumer_topic])

        while True:
            msg = self._consumer.poll(0.1)

            if msg is None:
                continue

            elif not msg.error():  # Received message
                self._consumer.commit()
                self.send_call_report(msg.value())
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                logging.info('End of partition reached {}/{}'.format(
                    msg.topic(), msg.partition()))
            else:
                logging.error('Error occurred: {}'.format(msg.error().str()))

    @abstractmethod
    def manage_call_report(self, call_report):
        """
        this function will be overwritten per setup on the dialer.
        The goal is to reformat the call report into the json format for OptimalQ's post request
        :param call_report:
        :return: call post_call_report(sample_pool_id, call_report)
        """
        pass

    def send_call_report(self, report):
        """
        Sends call report to the SyncReport service via kafka
        :param report:
        :return:
        """
        try:
            report = json.dumps(report)
            self._producer.produce(self._producer_topic,
                                   report,
                                   callback=KafkaUtils.self.delivery_report)
            self._producer.poll(0)
            logging.info("Sent call report to SyncReport")
        except Exception as ex:
            logging.exception(
                'Exception while trying to send message {} to topic {} - {}'.
                format(report, self._producer_topic, ex))

    def terminate(self):
        self._producer.flush()
Esempio n. 19
0
def main(args):
    brokers = args.brokers
    group_id = args.group_id
    input_topic = args.input_topic
    input_partition = args.input_partition
    output_topic = args.output_topic

    consumer = Consumer({
        'bootstrap.servers': brokers,
        'group.id': group_id,
        'auto.offset.reset': 'earliest',
        # Do not advance committed offsets outside of the transaction.
        # Consumer offsets are committed along with the transaction
        # using the producer's send_offsets_to_transaction() API.
        'enable.auto.commit': False,
        'enable.partition.eof': True,
    })

    # Prior to KIP-447 being supported each input partition requires
    # its own transactional producer, so in this example we use
    # assign() to a single partition rather than subscribe().
    # A more complex alternative is to dynamically create a producer per
    # partition in subscribe's rebalance callback.
    consumer.assign([TopicPartition(input_topic, input_partition)])

    producer = Producer({
        'bootstrap.servers': brokers,
        'transactional.id': 'eos-transactions.py'
    })

    # Initialize producer transaction.
    producer.init_transactions()
    # Start producer transaction.
    producer.begin_transaction()

    eof = {}
    msg_cnt = 0
    print("=== Starting Consume-Transform-Process loop ===")
    while True:
        # serve delivery reports from previous produce()s
        producer.poll(0)

        # read message from input_topic
        msg = consumer.poll(timeout=1.0)
        if msg is None:
            continue

        topic, partition = msg.topic(), msg.partition()
        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                eof[(topic, partition)] = True
                print("=== Reached the end of {} [{}] at {}====".format(
                    topic, partition, msg.offset()))

                if len(eof) == len(consumer.assignment()):
                    print("=== Reached end of input ===")
                    break
            continue
        # clear EOF if a new message has been received
        eof.pop((topic, partition), None)

        msg_cnt += 1

        # process message
        processed_key, processed_value = process_input(msg)

        # produce transformed message to output topic
        producer.produce(output_topic,
                         processed_value,
                         processed_key,
                         on_delivery=delivery_report)

        if msg_cnt % 100 == 0:
            print(
                "=== Committing transaction with {} messages at input offset {} ==="
                .format(msg_cnt, msg.offset()))
            # Send the consumer's position to transaction to commit
            # them along with the transaction, committing both
            # input and outputs in the same transaction is what provides EOS.
            producer.send_offsets_to_transaction(
                consumer.position(consumer.assignment()),
                consumer.consumer_group_metadata())

            # Commit the transaction
            producer.commit_transaction()

            # Begin new transaction
            producer.begin_transaction()
            msg_cnt = 0

    print("=== Committing final transaction with {} messages ===".format(
        msg_cnt))
    # commit processed message offsets to the transaction
    producer.send_offsets_to_transaction(
        consumer.position(consumer.assignment()),
        consumer.consumer_group_metadata())

    # commit transaction
    producer.commit_transaction()

    consumer.close()
Esempio n. 20
0
    print("Invalid name: " + file_name);
    exit(1)
capMsg = sumM3Message()
capMsg.topic="m3Capture"
capMsg.copy_m3name_values(m3n)
capMsg.fpath = file_name
captureMessage = capMsg.to_string()
print("Message: " + captureMessage)

# Send the message on the "m3Summary" topic
try:
    #define the producer configuration
    p = Producer({'bootstrap.servers': sys.argv[1]})
    print("Producer started")
    # Produce a message
    p.produce(capMsg.topic, captureMessage.encode(encoding='utf-8', errors='strict'), callback=captureAcked)
    print("Message produced")
    # Wait up to 1 second for events. Callbacks will be invoked during
    # this method call if the message is acknowledged.
    p.poll(1)
    print("Poll done")
except Exception:
    traceback.print_exc()
    print("Failed to produce the M3 summary message!")
    exit(1)

# flush and exit after the message is normally sent.
print("Flushing")
p.flush()
exit(0)
    def acked(err, msg):
        global delivered_records
        """Delivery report handler called on
        successful or failed delivery of message
        """
        if err is not None:
            print("Failed to deliver message: {}".format(err))
        else:
            delivered_records += 1
            print("Produced record to topic {} partition [{}] @ offset {}".
                  format(msg.topic(), msg.partition(), msg.offset()))

    f = open('2021-01-19.json', 'r')
    data = json.load(f)
    for n in range(1000):
        record_key = "alice"
        record_value = json.dumps(data[n])
        print("Producing record: {}\t{}".format(record_key, record_value))
        producer.produce(topic,
                         key=record_key,
                         value=record_value,
                         on_delivery=acked)
        # p.poll() serves delivery reports (on_delivery)
        # from previous produce() calls.
        producer.poll(0.25)

    producer.flush()

    print("{} messages were produced to topic {}!".format(
        delivered_records, topic))
Esempio n. 22
0
class CoinbasePro(cbpro.WebsocketClient):
    def on_open(self):
        self.url = "wss://ws-feed.pro.coinbase.com/"  
        self.products = ["BTC-USD", "ETH-USD", "LTC-USD",
                         "BCH-USD" , 'ETH-BTC', 'LTC-BTC']  
        self.type = 'ticker'
        self.producer = Producer({
            'bootstrap.servers': ','.join(KAFKA_NODES),
            'default.topic.config': {
                'request.required.acks': 'all'
            }
        })
        print('Established Socket Connection')

    def on_message(self, msg):
        def delivery_report(err, k_msg):
            # triggers delivery report  by poll() or flush()

            if err is not None:
                print(('Message delivery failed: {}'.format(err)))
            else:
                print(('Message delivered to {} [{}] - {}'.format(
                    k_msg.topic(), k_msg.partition(), msg['product_id'])))

        if 'time' in msg:  

            asset_pair = msg['product_id']

            data = {
                'bids': (msg['best_bid']),
                'len_bids': 1,
                'asks': (msg['best_ask']),
                'len_asks': 1,
                'product': asset_pair,
                'time': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+0000")
            }

            data['market'] = "Coinbase"
            message = json.dumps(data)

            # push to kafka topic
            topic = 'asks'
            self.producer.poll(0)
            self.producer.produce(
                topic,
                message.encode('utf-8'),
                key=asset_pair,
                callback=delivery_report)

    
    def _connect(self):
        if self.products is None:
            self.products = ["BTC-USD"]
        elif not isinstance(self.products, list):
            self.products = [self.products]

        if self.url[-1] == "/":
            self.url = self.url[:-1]

        self.ws = create_connection(self.url)

        self.stop = False

        if self.type == "heartbeat":
            sub_params = {
                'type': 'subscribe',
                "channels": [{
                    "name": "heartbeat",
                    "product_ids": self.products
                }]
            }
            self.ws.send(json.dumps(sub_params))
        elif self.type == 'ticker':
            sub_params = {
                'type': 'subscribe',
                "channels": [{
                    "name": "ticker",
                    "product_ids": self.products
                }]
            }
            self.ws.send(json.dumps(sub_params))
        else:
            sub_params = {'type': 'subscribe', 'product_ids': self.products}
            self.ws.send(json.dumps(sub_params))
Esempio n. 23
0
class ConfluentKafkaMsgQAPI:
    """
    This class provides API's into interact with Kafka Queue.
    """
    def __init__(self,
                 is_producer=False,
                 is_consumer=False,
                 perform_subscription=False,
                 thread_identifier=None):
        if not is_producer and not is_consumer:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer."
            )
            pass
        self.producer_instance = None
        self.consumer_instance = None
        self.broker_name = None
        self.topic = None
        self.producer_conf = None
        self.consumer_conf = None
        self.is_topic_created = False
        self.perform_subscription = perform_subscription
        self.thread_identifier = thread_identifier
        self.__read_environment_variables()
        # if is_producer:
        #    self.__producer_connect()
        # if is_consumer:
        #    self.__consumer_connect()

    def __read_environment_variables(self):
        """
        This method is used to read the environment variables defined in the OS.
        :return:
        """
        while self.broker_name is None or \
                self.topic is None:
            time.sleep(2)
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: "
                "Trying to read the environment variables...")
            self.broker_name = os.getenv("broker_name_key", default=None)
            self.topic = os.getenv("topic_key", default=None)
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name))
        logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format(
            self.topic))

    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    @staticmethod
    def delivery_callback(err, msg):
        if err:
            logging_to_console_and_syslog('%% Message failed delivery: %s\n' %
                                          err)
        else:
            logging_to_console_and_syslog(
                '%% Message delivered to %s [%d] @ %s\n' %
                (msg.topic(), msg.partition(), str(msg.offset())))

    def __producer_connect(self):
        """
        This method tries to connect to the kafka broker based upon the type of kafka.
        :return:
        """
        is_connected = False
        if self.producer_instance is None:
            try:
                self.producer_conf = {'bootstrap.servers': self.broker_name}
                # Create Producer instance
                self.producer_instance = Producer(**self.producer_conf)
                is_connected = True
            except:
                print("Exception in user code:")
                print("-" * 60)
                traceback.print_exc(file=sys.stdout)
                print("-" * 60)
                time.sleep(5)
            else:
                logging_to_console_and_syslog(
                    "ConfluentKafkaMsgQAPI: Successfully "
                    "connected to broker_name={}".format(self.broker_name))
        return is_connected

    def enqueue(self, filename):
        """
        This method tries to post a message to the pre-defined kafka topic.
        :param filename:
        :return status False or True:
        """
        status = False

        if filename is None or len(filename) == 0:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: filename is None or invalid")
            return status
        if self.producer_instance is None:
            logging_to_console_and_syslog(
                "KafkaMsgQAPI: Producer instance is None. Trying to create one.."
            )
            if not self.__producer_connect():
                logging_to_console_and_syslog(
                    "Unable to create producer instance.")
                return status

        if not self.is_topic_created:
            try:
                if self.producer_instance.list_topics(self.topic, timeout=1.0):
                    logging_to_console_and_syslog(
                        "Found topic name = {} in the zookeeper.".format(
                            self.topic))
                    self.is_topic_created = True
            except KafkaException:
                kafka_admin_client = admin.AdminClient(self.producer_conf)
                logging_to_console_and_syslog("Creating topic {}.".format(
                    self.topic))
                ret = kafka_admin_client.create_topics(new_topics=[
                    admin.NewTopic(topic=self.topic, num_partitions=1)
                ],
                                                       operation_timeout=1.0)
                logging_to_console_and_syslog("ret = {}".format(ret))

        # Asynchronously produce a message, the delivery report callback
        # will be triggered from poll() above, or flush() below, when the message has
        # been successfully delivered or failed permanently.
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: Posting filename={} into "
            "kafka broker={}, topic={}".format(filename, self.broker_name,
                                               self.topic))
        value = filename.encode('utf-8')
        try:
            # Produce line (without newline)
            self.producer_instance.produce(
                self.topic,
                value,
                callback=ConfluentKafkaMsgQAPI.delivery_callback)
            status = True
        except BufferError:
            sys.stderr.write('%% Local producer queue is full '
                             '(%d messages awaiting delivery): try again\n' %
                             len(self.producer_instance))
            status = False
        except:
            print("ConfluentKafkaMsgQAPI: Exception in user code:")
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
            status = False
        else:
            event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \
                    "kafka broker={}, topic={}." \
                .format(filename,
                        self.broker_name,
                        self.topic)
            logging_to_console_and_syslog(event)
            # Wait for any outstanding messages to be delivered and delivery report
            # callbacks to be triggered.
            # Serve delivery callback queue.
            # NOTE: Since produce() is an asynchronous API this poll() call
            #       will most likely not serve the delivery callback for the
            #       last produce()d message.
            self.producer_instance.poll(timeout=0.1)
            # Wait until all messages have been delivered
            # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance))
            self.producer_instance.flush(timeout=0.1)

            return status

    def __consumer_connect_to_broker(self):
        """
        This method tries to connect to the kafka broker.
        :return:
        """
        is_connected = False

        # Consumer configuration
        # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """
            self.consumer_conf = {'bootstrap.servers': self.broker_name,
                              'group.id': 'kafka-consumer',
                              'session.timeout.ms': 6000,
                              'auto.offset.reset': 'earliest'}
        """
        if self.consumer_instance is None:
            try:

                logging_to_console_and_syslog(
                    "Consumer:{}:Trying to connect to broker_name={}".format(
                        self.thread_identifier, self.broker_name))
                # Create Consumer instance
                # Hint: try debug='fetch' to generate some log messages
                consumer_conf = {
                    'bootstrap.servers': self.broker_name,
                    'group.id': self.topic,
                    'session.timeout.ms': 6000,
                    'auto.offset.reset': 'earliest'
                }

                # consumer_conf['stats_cb'] = stats_cb
                # consumer_conf['statistics.interval.ms'] = 0
                self.consumer_instance = Consumer(consumer_conf)
                is_connected = True
            except:
                logging_to_console_and_syslog(
                    "Consumer:{}:Exception in user code:".format(
                        self.thread_identifier))
                logging_to_console_and_syslog("-" * 60)
                traceback.print_exc(file=sys.stdout)
                logging_to_console_and_syslog("-" * 60)
                time.sleep(5)

        logging_to_console_and_syslog("Consumer:{}:Consumer Successfully "
                                      "connected to broker_name={}".format(
                                          self.thread_identifier,
                                          self.broker_name))
        return is_connected

    @staticmethod
    def print_assignment(consumer, partitions):
        print('consumer = {}, Assignment {}:'.format(consumer, partitions))

    def dequeue(self):
        conf = {
            'bootstrap.servers': self.broker_name,
            'group.id': self.topic,
            'session.timeout.ms': 6000,
            'auto.offset.reset': 'earliest'
        }
        if not self.consumer_instance:
            self.consumer_instance = Consumer(conf)
            self.consumer_instance.subscribe(
                [self.topic], on_assign=ConfluentKafkaMsgQAPI.print_assignment)
        msg = self.consumer_instance.poll(timeout=1.0)
        if msg is None or msg.error():
            return None
        else:
            logging_to_console_and_syslog(
                '%% %s [%d] at offset %d with key %s:\n' %
                (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))
            msg = msg.value().decode('utf8')
            logging_to_console_and_syslog("msg.value()={}".format(msg))
            self.consumer_instance.close()
            self.consumer_instance = None
            return msg

    def cleanup(self):
        if self.consumer_instance:
            self.consumer_instance.close()
            self.consumer_instance = None
Esempio n. 24
0
            # 讓我們產生假的Employee資料
            employee = Employee(id_='emp_id_' + fakeNumber,
                                first_name='fn_' + fakeNumber,
                                last_name='ln_' + fakeNumber,
                                dept_id='dept_id_' + str(i % 10),
                                hire_date=epoch_now_mills(),
                                wage=float(i),
                                sex=True)
            # 轉換成JSON字串
            employeeJson = json.dumps(employee.__dict__)
            # 送出訊息
            producer.produce(topicName,
                             key=str(i),
                             value=employeeJson,
                             callback=delivery_callback)
            producer.poll(0)  # 呼叫poll來讓client程式去檢查內部的Buffer, 並觸發callback
            if i % 10000 == 0:
                print('Send {} messages'.format(i))
        time_spend = int(round(time.time() * 1000)) - time_start
        print('Send        : ' + str(msgCount) + ' messages to Kafka')
        print('Total spend : ' + str(time_spend) + ' millis-seconds')
        print('Throughput : ' + str(msgCount / time_spend * 1000) + ' msg/sec')
    except BufferError as e:
        # 錯誤處理
        sys.stderr.write(
            '%% Local producer queue is full ({} messages awaiting delivery): try again\n'
            .format(len(producer)))
    except Exception as e:
        print(e)

    # 步驟5. 確認所有在Buffer裡的訊息都己經送出去給Kafka了
Esempio n. 25
0
class BMPNodes(object):
    def __init__(self, bootstrap_server=None, redishost=None):
        self.nodes = {}
        if redishost is None:
            raise ValueError("Redis Hostname not specified, bailing out")
        else:
            self.redis = redis.StrictRedis(host=redishost)
            self.redis.flushall()
            self.pubsub = self.redis.pubsub()

        self.routerevent = threading.Event()
        self.peerevent = threading.Event()
        self.threadList = []
        self.poisonpillq = Queue.Queue()
        self.peer_consumer = None
        self.router_consumer = None
        self.prefix_consumer = None
        self.rib_producer = None

        if bootstrap_server is not None:
            self.bootstrap_server = bootstrap_server

            for fn in [
                    self.capture_router_msg, self.capture_peer_msg,
                    self.capture_prefix_msg, self.redis_listener
            ]:
                thread = threading.Thread(target=fn, args=())
                self.threadList.append(thread)
                thread.daemon = True  # Daemonize thread
                thread.start()  # Start the execution
        else:
            raise ValueError("Bootstrap server not specified")

        self.dispatch = {'init': self.add_router, 'term': self.delete_router}

        self.redis_dispatch = {
            'AdjInRib': self.adjRibPolicyWorker,
            'AdjInRibPP': self.localRibWorker,
            'localRib': self.kafkaWorker
        }

    def get_nodes(self):
        nodeset = {}
        for node in self.nodes.keys():
            rtr = self.nodes[node]
            nodeset.update({str(rtr.name) + ':' + str(rtr.ipaddr): node})
            # Also provide the reverse mapping
            nodeset.update({node: str(rtr.name) + ':' + str(rtr.ipaddr)})
        return nodeset

    def serialize(self):
        nodeset = {}
        for node in self.nodes.keys():
            nodeset.update({node: self.nodes[node].serialize()})

        return nodeset

    class PoisonPillException(Exception):
        pass

    def consumer_cleanup(self):
        logger.debug("Cleaning up, exiting the active threads")
        for thread in self.threadList:
            self.poisonpillq.put("quit")

        # The redis listener will need the poisonpill channel publish
        self.redis.publish('poisonpill', "quit")

        for thread in self.threadList:
            logger.debug("Waiting for %s to finish..." % (thread.name))
            thread.join()
        return

    def process_msg(self, router_msg):
        # Ignore the first message (action = first)
        for msg in router_msg:
            if str(msg['action']) != 'first':
                self.dispatch[str(msg['action'])](msg)
            else:
                logger.debug("Ignoring action=first in openbmp router message")

    def add_router(self, router_msg):
        if str(router_msg['hash']) not in self.nodes:
            # Create the router object
            node = Node(node_hash=router_msg['hash'],
                        name=router_msg.pop('name'),
                        ipaddr=router_msg.pop('ip_address'),
                        data=router_msg)

            # Add to existing router set
            self.nodes.update({str(router_msg['hash']): node})

        else:
            logger.debug(
                "Received an add event for an existing peer. Strange, but ignore"
            )

    def delete_router(self, router_msg):
        if str(router_msg['hash']) in self.nodes:
            # Delete the particular router from the current router set
            del self.nodes[str(router_msg['hash'])]

            # Delete the router hash from redis
            self.redis.delete(str(router_msg['hash']))
        else:
            logger.debug(
                "Received a del event for a non-existent peer, ignore")

    def update_redis(self, channel=None):
        # Called to reflect latest state when new messages are received.
        nodes = {}
        if self.get_nodes():
            self.redis.hmset("routers", self.get_nodes())
            for node in self.nodes.keys():
                self.redis.hmset(node, self.nodes[node].serialize())

        if channel:
            # Publish message to redis Listeners
            self.redis.publish(
                channel, "Publish to " +
                str(self.redis_dispatch[channel].__name__) + " worker")

    def redis_listener(self):
        self.pubsub.subscribe(
            ['AdjInRib', 'AdjInRibPP', 'localRib', 'poisonpill'])
        pill = ''
        try:
            while True:
                for item in self.pubsub.listen():
                    logger.info("Received Redis event")
                    if item['data'] == "quit":
                        self.pubsub.unsubscribe()
                        logger.debug(
                            "unsubscribed and finished redis pubsub listener")
                        raise self.PoisonPillException
                    else:
                        if item['channel'] in self.redis_dispatch:
                            self.redis_dispatch[item['channel']]()

        except self.PoisonPillException:
            return

        except Exception as e:
            logger.debug("Error while listening to redis events")
            logger.debug("Error is" + str(e))
            return

    def adjRibPolicyWorker(self):
        logger.debug("Received an AdjInRib event")
        # walk through the nodes and apply available policies
        #nodes = {}
        if self.get_nodes():
            for node in self.nodes.keys():
                # process and apply policies
                self.nodes[node].adjInRibPP.process_adjInRib(node, self.redis)

        self.update_redis('AdjInRibPP')

    def localRibWorker(self):
        # walk through the nodes and apply available path selection algorithms
        #nodes = {}
        if self.get_nodes():
            for node in self.nodes.keys():
                # process and do path selection
                self.nodes[node].localRib.process_adjInRibPP(node, self.redis)

        self.update_redis('localRib')

    # Optional per-message delivery callback (triggered by poll() or flush())
    # during the rib stream to kafka when a message has been successfully delivered
    # or permanently failed delivery (after retries).

    @staticmethod
    def delivery_callback(err, msg):
        if err:
            logger.debug('%% Message failed delivery: %s\n' % err)
        else:
            logger.debug('%% Message delivered to %s [%d]\n' %
                         (msg.topic(), msg.partition()))

    def kafkaWorker(self):
        # With the local Rib ready, push routes to Kafka. This is meant to
        # serve as a streaming set of routes to router clients which will be
        # kafka consumers. This is NOT a way to resync if the router dies or
        # router client disconnects - for that sync with the redis database
        # first and then start listening to fresh messages from Kafka for route events.

        self.rib_producer = Producer(
            {'bootstrap.servers': self.bootstrap_server})

        if self.get_nodes():
            for node in self.nodes.keys():

                topic = self.nodes[node].hash

                # fetch localRib routes from Redis, push to Kafka bus
                localRib = ast.literal_eval(self.redis.hget(node, 'localRib'))
                if localRib:
                    for route in localRib:
                        logger.debug(route)
                        #   self.shuttler.rtQueue.put(route)
                        try:
                            self.rib_producer.produce(
                                topic,
                                value=json.dumps(route),
                                callback=self.delivery_callback)
                            self.rib_producer.poll(0)
                        except BufferError as e:
                            logger.debug(
                                '%% Local producer queue is full (%d messages awaiting delivery): try again\n'
                                % len(self.rib_producer))
                            #  putting the poll() first to block until there is queue space available.
                            # This blocks for RIB_PRODUCER_WAIT_INTERVAL seconds because  message delivery can take some time
                            # if there are temporary errors on the broker (e.g., leader failover).
                            self.rib_producer.poll(RIB_PRODUCER_WAIT_INTERVAL *
                                                   1000)

                            # Now try again when there is hopefully some free space on the queue
                            self.rib_producer.produce(
                                topic,
                                value=json.dumps(route),
                                callback=self.delivery_callback)

                    # Wait until all messages have been delivered
                    logger.debug('%% Waiting for %d deliveries\n' %
                                 len(self.rib_producer))
                    self.rib_producer.flush()

    def capture_router_msg(self):
        pill = ''
        topics = ['openbmp.parsed.router']
        logger.debug("Connecting to Kafka to receive router messages")
        self.router_consumer = Consumer({
            'bootstrap.servers':
            self.bootstrap_server,
            'group.id':
            'bmp_client' + str(time.time()),
            'client.id':
            'bmp_client' + str(time.time()),
            'default.topic.config': {
                'auto.offset.reset': 'smallest',
                'auto.commit.interval.ms': 1000,
                'enable.auto.commit': True
            }
        })

        self.router_consumer.subscribe(topics)

        try:
            while True:
                msg = self.router_consumer.poll(timeout=1.0)

                try:
                    pill = self.poisonpillq.get_nowait()
                except Queue.Empty:
                    pass

                if isinstance(pill, str) and pill == "quit":
                    raise self.PoisonPillException

                if msg is None:
                    self.routerevent.set()
                    continue
                if msg.error():
                    # Error or event
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        logger.debug(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        # Error
                        raise KafkaException(msg.error())
                else:
                    # Process the  message
                    m = Message(msg.value())  # Gets body of kafka message.
                    t = msg.topic()  # Gets topic of kafka message.
                    m_tag = t.split('.')[2].upper()
                    t_stamp = str(datetime.datetime.now())

                    if t == "openbmp.parsed.router":
                        router = Router(m)
                        logger.debug('Received Message (' + t_stamp + ') : ' +
                                     m_tag + '(V: ' + str(m.version) + ')')
                        logger.debug(router.to_json_pretty())
                        router_msg = yaml.safe_load(router.to_json_pretty())
                        logger.debug("Calling process msg for Router messages")
                        bmpnodes.process_msg(router_msg)
                        # update redis
                        self.update_redis()
                        self.routerevent.clear()

        except self.PoisonPillException:
            logger.debug("Poison Pill received")
            logger.debug("Shutting down the router message consumer")
            self.router_consumer.close()
            return

        except Exception as e:
            logger.debug(
                "Exception occurred while listening for router messages")
            logger.debug("Error is " + str(e))
            self.router_consumer.close()
            return

    def capture_peer_msg(self):

        pill = ''
        topics = ['openbmp.parsed.peer']
        logger.info("Connecting to Kafka to receive peer messages")
        self.peer_consumer = Consumer({
            'bootstrap.servers':
            self.bootstrap_server,
            'group.id':
            'bmp_client' + str(time.time()),
            'client.id':
            'bmp_client' + str(time.time()),
            'default.topic.config': {
                'auto.offset.reset': 'smallest',
                'auto.commit.interval.ms': 1000,
                'enable.auto.commit': True
            }
        })

        self.peer_consumer.subscribe(topics)

        try:
            while True:
                msg = self.peer_consumer.poll(timeout=1.0)

                try:
                    pill = self.poisonpillq.get_nowait()
                except Queue.Empty:
                    pass

                if isinstance(pill, str) and pill == "quit":
                    raise self.PoisonPillException

                if msg is None:
                    self.peerevent.set()
                    continue
                if msg.error():
                    # Error or event
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        logger.debug(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        # Error
                        raise KafkaException(msg.error())
                else:
                    # Process the  message
                    m = Message(msg.value())  # Gets body of kafka message.
                    t = msg.topic()  # Gets topic of kafka message.
                    m_tag = t.split('.')[2].upper()
                    t_stamp = str(datetime.datetime.now())

                    if t == "openbmp.parsed.peer":
                        peer = Peer(m)
                        logger.debug('Received Message (' + t_stamp + ') : ' +
                                     m_tag + '(V: ' + str(m.version) + ')')
                        logger.debug(peer.to_json_pretty())
                        peer_msg = yaml.safe_load(peer.to_json_pretty())
                        for msg in peer_msg:
                            processed = False
                            while not processed:
                                if str(msg['router_hash']) in self.nodes:
                                    self.nodes[str(
                                        msg['router_hash'])].process_msg(msg)
                                    processed = True
                                else:
                                    logger.debug(
                                        "Received peer message for currently unknown Router, hash="
                                        + str(msg['router_hash']))
                                    logger.debug(
                                        "Let's wait for router_msg event to be set"
                                    )
                                    self.routerevent.wait(
                                        PEER_MSG_DAMPENING_TIMER)

                        # Go ahead and update Redis
                        self.update_redis()
                        self.peerevent.clear()

        except self.PoisonPillException:
            logger.debug("Poison Pill received")
            logger.debug("Shutting down the peer message consumer")
            self.peer_consumer.close()
            return

        except Exception as e:
            logger.debug(
                "Exception occured while listening to peer messages from Kafka"
            )
            logger.debug("Error is " + str(e))
            self.router_consumer.close()
            return

    def capture_prefix_msg(self):
        pill = ''
        topics = ['openbmp.parsed.unicast_prefix']
        logger.debug("Connecting to Kafka to receive prefix messages")
        self.prefix_consumer = Consumer({
            'bootstrap.servers':
            self.bootstrap_server,
            'group.id':
            'bmp_client' + str(time.time()),
            'client.id':
            'bmp_client' + str(time.time()),
            'default.topic.config': {
                'auto.offset.reset': 'smallest',
                'auto.commit.interval.ms': 1000,
                'enable.auto.commit': True
            }
        })

        self.prefix_consumer.subscribe(topics)

        try:
            while True:
                msg = self.prefix_consumer.poll(timeout=1.0)

                try:
                    pill = self.poisonpillq.get_nowait()
                except Queue.Empty:
                    pass

                if isinstance(pill, str) and pill == "quit":
                    raise self.PoisonPillException

                if msg is None:
                    continue
                if msg.error():
                    # Error or event
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        logger.debug(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        # Error
                        raise KafkaException(msg.error())
                else:
                    # Process the  message
                    m = Message(msg.value())  # Gets body of kafka message.
                    t = msg.topic()  # Gets topic of kafka message.
                    m_tag = t.split('.')[2].upper()
                    t_stamp = str(datetime.datetime.now())

                    if t == "openbmp.parsed.unicast_prefix":
                        unicast_prefix = UnicastPrefix(m)
                        logger.debug('Received Message (' + t_stamp + ') : ' +
                                     m_tag + '(V: ' + str(m.version) + ')')
                        logger.debug(unicast_prefix.to_json_pretty())
                        prefix_msg = yaml.safe_load(
                            unicast_prefix.to_json_pretty())

                        for msg in prefix_msg:
                            processed = False
                            while not processed:
                                if str(msg['router_hash']) in self.nodes:
                                    self.nodes[str(msg['router_hash']
                                                   )].adjInRib.process_msg(msg)
                                    processed = True
                                else:
                                    logger.debug(
                                        "Received peer message for currently unknown Router, hash="
                                        + str(msg['router_hash']))
                                    logger.debug(
                                        "Let's wait for router_msg event to be set"
                                    )
                                    self.peerevent.wait(
                                        PREFIX_MSG_DAMPENING_TIMER)

                        # Go ahead and update Redis
                        self.update_redis('AdjInRib')

        except self.PoisonPillException:
            logger.debug("Poison Pill received")
            logger.debug("Shutting down the prefix message consumer")
            self.prefix_consumer.close()
            return

        except Exception as e:
            logger.debug(
                "Exception occurred while listening for prefix messages")
            logger.debug("Error is " + str(e))
            self.prefix_consumer.close()
            return
def _produce(data, topic=_CONFIG['topics']['workspace_events']):
    print(f"Producing to: {_CONFIG['kafka_server']}")
    producer = Producer({'bootstrap.servers': _CONFIG['kafka_server']})
    producer.produce(topic, json.dumps(data), callback=_delivery_report)
    producer.poll(60)
Esempio n. 27
0
class KafkaProducer(Producer[TPayload]):
    def __init__(self, configuration: Mapping[str, Any],
                 codec: Codec[KafkaPayload, TPayload]) -> None:
        self.__configuration = configuration
        self.__codec = codec

        self.__producer = ConfluentProducer(configuration)
        self.__shutdown_requested = Event()

        # The worker must execute in a separate thread to ensure that callbacks
        # are fired -- otherwise trying to produce "synchronously" via
        # ``produce(...).result()`` could result in a deadlock.
        self.__result = execute(self.__worker)

    def __worker(self) -> None:
        """
        Continuously polls the producer to ensure that delivery callbacks are
        triggered (which correspondingly set the result values on the
        ``Future`` instances returned by ``produce``.) This function exits
        after a shutdown request has been issued (via ``close``) and all
        in-flight messages have been delivered.
        """
        while not self.__shutdown_requested.is_set():
            self.__producer.poll(0.1)
        self.__producer.flush()

    def __delivery_callback(
        self,
        future: Future[Message[TPayload]],
        payload: TPayload,
        error: KafkaError,
        message: ConfluentMessage,
    ) -> None:
        if error is not None:
            future.set_exception(TransportError(error))
        else:
            try:
                timestamp_type, timestamp_value = message.timestamp()
                if timestamp_type is TIMESTAMP_NOT_AVAILABLE:
                    raise ValueError("timestamp not available")

                future.set_result(
                    Message(
                        Partition(Topic(message.topic()), message.partition()),
                        message.offset(),
                        payload,
                        datetime.utcfromtimestamp(timestamp_value / 1000.0),
                    ))
            except Exception as error:
                future.set_exception(error)

    def produce(self, destination: Union[Topic, Partition],
                payload: TPayload) -> Future[Message[TPayload]]:
        if self.__shutdown_requested.is_set():
            raise RuntimeError("producer has been closed")

        if isinstance(destination, Topic):
            produce = partial(self.__producer.produce, topic=destination.name)
        elif isinstance(destination, Partition):
            produce = partial(
                self.__producer.produce,
                topic=destination.topic.name,
                partition=destination.index,
            )
        else:
            raise TypeError("invalid destination type")

        encoded = self.__codec.encode(payload)

        future: Future[Message[TPayload]] = Future()
        future.set_running_or_notify_cancel()
        produce(
            value=encoded.value,
            key=encoded.key,
            headers=encoded.headers,
            on_delivery=partial(self.__delivery_callback, future, payload),
        )
        return future

    def close(self) -> Future[None]:
        self.__shutdown_requested.set()
        return self.__result
Esempio n. 28
0
    try:
        print('Start sending messages ...')

        time_start = int(round(time.time() * 1000))

        # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers])

        # ** 示範: Fire - and -forget **
        # 在以下的"prouce()"過程, 我們並沒有去檢查訊息發佈的結果
        # 因此這種方法的throughput最高, 但也不知道訊息是否發佈成功或失敗

        for i in range(0, msgCount):
            producer.produce(topicName, key=str(i), value='msg_'+str(i))

            producer.poll(0)  # <-- (重要) 呼叫poll來讓client程式去檢查內部的Buffer

            if i%10000==0:
                print('Send {} messages'.format(i))

        time_spend = int(round(time.time() * 1000)) - time_start

        print('Send        : ' + str(msgCount) + ' messages to Kafka')
        print('Total spend : ' + str(time_spend) + ' millis-seconds')
        print('Throughtput : ' + str(msgCount/time_spend * 1000) + ' msg/sec')

    except BufferError as e:
        # 錯誤處理
        sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(producer))
    except Exception as e:
        print(e)
    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    def delivery_callback(err, msg):
        if err:
            sys.stderr.write('%% Message failed delivery: %s\n' % err)
        else:
            sys.stderr.write('%% Message delivered to %s [%d] @ %d\n' %
                             (msg.topic(), msg.partition(), msg.offset()))

    # Read lines from stdin, produce each line to Kafka
    for line in sys.stdin:
        try:
            # Produce line (without newline)
            p.produce(topic, line.rstrip(), callback=delivery_callback)

        except BufferError:
            sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' %
                             len(p))

        # Serve delivery callback queue.
        # NOTE: Since produce() is an asynchronous API this poll() call
        #       will most likely not serve the delivery callback for the
        #       last produce()d message.
        p.poll(0)

    # Wait until all messages have been delivered
    sys.stderr.write('%% Waiting for %d deliveries\n' % len(p))
    p.flush()
Esempio n. 30
0
class RiverApiProducer:
    def __init__(self, broker=None, callback_function=None):
        """
        Instantiate the class and create the consumer object
        :param broker: host[:port]’ string (or list of ‘host[:port]’ strings)
            that the consumer should contact to bootstrap initial cluster metadata
        :param callback_function: fn taking 3 args: err, msg, obj, that is called
            after the event is produced and an error increment (int).
            Default logs the error or success
        """
        self.broker = broker
        self.partition = 0
        self.callback_function = (callback_function
                                  if callback_function else self.callback_fn)

        # Create consumer
        self.producer = Producer(self._generate_config())

    def _generate_config(self):
        """
        Generate configuration dictionary for consumer
        :return:
        """
        config = {"bootstrap.servers": self.broker, "session.timeout.ms": 6000}
        return config

    def produce_event(self, topic, event):
        """
        Produce event in the specified topic
        :param topic: str
        :param event: dict
        :return:
        """
        try:
            self.producer.produce(
                topic=topic,
                value=json.dumps(event, default=self.default_json_encoder),
                callback=lambda err, msg, obj=event: self.callback_function(
                    err, msg, obj),
            )
            self.producer.poll(1)  # Callback function
        except ValueError as error:
            logger.error(error)

    @staticmethod
    def default_json_encoder(o):
        """
        Json Encoder for datetime
        :return:
        """
        if isinstance(o, (datetime.date, datetime.datetime)):
            return o.isoformat()

    @staticmethod
    def callback_fn(err, msg, obj):
        """
        Handle delivery reports served from producer.poll.
        This callback takes an extra argument, obj.
        This allows the original contents to be included for debugging purposes.
        """
        if err is not None:
            logger.debug(
                "Message {} delivery failed with error {} for topic {}".format(
                    obj, err, msg.topic()))
        else:
            logger.debug("Event Successfully created")
        'bootstrap.servers': "kafka:2181",
        'group.id': "json_producer"
    })
    time.sleep(10)

    def delivery_callback (err, msg):
        if err:
            sys.stderr.write('%% Message failed delivery: %s\n' % err)
        else:
            sys.stderr.write('%% Message delivered to %s [%d]\n' % \
                             (msg.topic(), msg.partition()))

    for tweet in get_tweet('examples/tweets-200k.txt.gz'):
        # if len(tweet['entities']['urls']) > 0 and \
        #         any(tweet['lang'] in l for l in ['es', 'en']):
        try:
            print("%s: %s" % (tweet['user']['screen_name'], tweet['text']))
            kfk.produce(
                "raw_tweets",
                json.dumps(tweet),
                callback=delivery_callback
            )
            kfk.poll(0)
            kfk.flush()
        except BufferError as e:
            sys.stderr.write('%% Local producer queue is full ' \
                             '(%d messages awaiting delivery): try again\n' %
                             len(kfk))


Esempio n. 32
0
from config import Config
from confluent_kafka import Producer


def acked(err, msg):
    if err is not None:
        print("Failed to deliver message: {0}: {1}".format(
            msg.value(), err.str()))
    else:
        print("Message produced: {0}".format(msg.value()))  # binary


p = Producer({"bootstrap.servers": Config.MY_SERVER})

try:
    for val in range(1, 5):
        p.produce(Config.TOPIC_ID, "value #{0}".format(val), callback=acked)
        p.poll(0.5)

except KeyboardInterrupt:
    pass

p.flush(100)

# kafka-console-consumer --bootstrap-server localhost:9092 --topic first-topic
Esempio n. 33
0
    def test_ingester(self):

        print(f'{time_stamp()}: Setting up paths')
        path_kafka = pathlib.Path(config['path']['kafka'])

        path_logs = pathlib.Path(config['path']['logs'])
        if not path_logs.exists():
            path_logs.mkdir(parents=True, exist_ok=True)

        print(f'{time_stamp()}: Setting up test program in Fritz')
        program = Program(group_name="FRITZ_TEST", group_nickname="Fritz")

        # clean up old Kafka logs
        print(f'{time_stamp()}: Cleaning up Kafka logs')
        subprocess.run([
            'rm',
            '-rf',
            path_logs / "kafka-logs",
            "/tmp/zookeeper"
        ])

        print(f'{time_stamp()}: Starting up ZooKeeper at localhost:2181')

        # start ZooKeeper in the background
        cmd_zookeeper = [os.path.join(config['path']['kafka'], 'bin', 'zookeeper-server-start.sh'),
                         '-daemon',
                         os.path.join(config['path']['kafka'], 'config', 'zookeeper.properties')]

        with open(path_logs / 'zookeeper.stdout', 'w') as stdout_zookeeper:
            p_zookeeper = subprocess.run(cmd_zookeeper, stdout=stdout_zookeeper, stderr=subprocess.STDOUT)

        # take a nap while it fires up
        time.sleep(3)

        print(f'{time_stamp()}: Starting up Kafka Server at localhost:9092')

        # start the Kafka server:
        cmd_kafka_server = [os.path.join(config['path']['kafka'], 'bin', 'kafka-server-start.sh'),
                            '-daemon',
                            os.path.join(config['path']['kafka'], 'config', 'server.properties')]

        with open(os.path.join(config['path']['logs'], 'kafka_server.stdout'), 'w') as stdout_kafka_server:
            # p_kafka_server = subprocess.Popen(cmd_kafka_server, stdout=stdout_kafka_server, stderr=subprocess.STDOUT)
            p_kafka_server = subprocess.run(cmd_kafka_server)

        # take a nap while it fires up
        time.sleep(3)

        # get kafka topic names with kafka-topics command
        cmd_topics = [os.path.join(config['path']['kafka'], 'bin', 'kafka-topics.sh'),
                      '--zookeeper', config['kafka']['zookeeper.test'],
                      '-list']

        topics = subprocess.run(cmd_topics, stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n')[:-1]
        print(f'{time_stamp()}: Found topics: {topics}')

        # create a test ZTF topic for the current UTC date
        date = datetime.datetime.utcnow().strftime("%Y%m%d")
        topic_name = f'ztf_{date}_programid1_test'

        if topic_name in topics:
            # topic previously created? remove first
            cmd_remove_topic = [os.path.join(config['path']['kafka'], 'bin', 'kafka-topics.sh'),
                                '--zookeeper', config['kafka']['zookeeper.test'],
                                '--delete', '--topic', topic_name]
            # print(kafka_cmd)
            remove_topic = subprocess.run(cmd_remove_topic,
                                          stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n')[:-1]
            print(f'{time_stamp()}: {remove_topic}')
            print(f'{time_stamp()}: Removed topic: {topic_name}')
            time.sleep(1)

        if topic_name not in topics:
            print(f'{time_stamp()}: Creating topic {topic_name}')

            cmd_create_topic = [os.path.join(config['path']['kafka'], 'bin', 'kafka-topics.sh'),
                                "--create",
                                "--bootstrap-server", config['kafka']['bootstrap.test.servers'],
                                "--replication-factor", "1",
                                "--partitions", "1",
                                "--topic", topic_name]
            with open(os.path.join(config['path']['logs'], 'create_topic.stdout'), 'w') as stdout_create_topic:
                p_create_topic = subprocess.run(cmd_create_topic, stdout=stdout_create_topic, stderr=subprocess.STDOUT)

        print(f'{time_stamp()}: Starting up Kafka Producer')

        # spin up Kafka producer
        producer = Producer({'bootstrap.servers': config['kafka']['bootstrap.test.servers']})

        # small number of alerts that come with kowalski
        path_alerts = pathlib.Path('/app/data/ztf_alerts/20200202/')
        # grab some more alerts from gs://ztf-fritz/sample-public-alerts
        try:
            print(f'{time_stamp()}: Grabbing more alerts from gs://ztf-fritz/sample-public-alerts')
            r = requests.get('https://www.googleapis.com/storage/v1/b/ztf-fritz/o')
            aa = r.json()['items']
            ids = [pathlib.Path(a['id']).parent for a in aa if 'avro' in a['id']]
        except Exception as e:
            print(f'{time_stamp()}: Grabbing alerts from gs://ztf-fritz/sample-public-alerts failed, but it is ok')
            print(f'{time_stamp()}: {e}')
            ids = []
        subprocess.run([
            "gsutil", "-m", "cp", "-n",
            "gs://ztf-fritz/sample-public-alerts/*.avro",
            "/app/data/ztf_alerts/20200202/"
        ])
        print(f'{time_stamp()}: Fetched {len(ids)} alerts from gs://ztf-fritz/sample-public-alerts')
        # push!
        for p in path_alerts.glob('*.avro'):
            with open(str(p), 'rb') as data:
                # Trigger any available delivery report callbacks from previous produce() calls
                producer.poll(0)

                print(f'{time_stamp()}: Pushing {p}')

                # Asynchronously produce a message, the delivery report callback
                # will be triggered from poll() above, or flush() below, when the message has
                # been successfully delivered or failed permanently.
                producer.produce(topic_name, data.read(), callback=delivery_report)

                # Wait for any outstanding messages to be delivered and delivery report
                # callbacks to be triggered.
        producer.flush()

        print(f'{time_stamp()}: Creating a test filter')
        test_filter = Filter(
            collection='ZTF_alerts',
            group_id=program.group_id,
            filter_id=program.filter_id
        )

        print(f'{time_stamp()}: Starting up Ingester')

        # digest and ingest
        ingester(obs_date=date, test=True)
        print(f'{time_stamp()}: Digested and ingested: all done!')

        # shut down Kafka server and ZooKeeper
        time.sleep(10)

        print(f'{time_stamp()}: Removing the test filter')
        test_filter.remove()

        print(f'{time_stamp()}: Shutting down Kafka Server at localhost:9092')
        # start the Kafka server:
        cmd_kafka_server_stop = [os.path.join(config['path']['kafka'], 'bin', 'kafka-server-stop.sh'),
                                 os.path.join(config['path']['kafka'], 'config', 'server.properties')]

        with open(os.path.join(config['path']['logs'], 'kafka_server.stdout'), 'w') as stdout_kafka_server:
            p_kafka_server_stop = subprocess.run(cmd_kafka_server_stop,
                                                 stdout=stdout_kafka_server, stderr=subprocess.STDOUT)

        print(f'{time_stamp()}: Shutting down ZooKeeper at localhost:2181')

        # start ZooKeeper in the background (using Popen and not run with shell=True for safety)
        cmd_zookeeper_stop = [os.path.join(config['path']['kafka'], 'bin', 'zookeeper-server-stop.sh'),
                              os.path.join(config['path']['kafka'], 'config', 'zookeeper.properties')]

        with open(os.path.join(config['path']['logs'], 'zookeeper.stdout'), 'w') as stdout_zookeeper:
            p_zookeeper_stop = subprocess.run(cmd_zookeeper_stop, stdout=stdout_zookeeper, stderr=subprocess.STDOUT)
Esempio n. 34
0
def generate(config, asset_0, asset_1, interval_ms, inject_error, devmode, destination):
    """generate data and send it to a Kafka broker"""

    interval_secs = interval_ms / 1000.0
    random.seed()

    if not devmode:
        if destination == "kafka":
            #prepare Kafka connection
            kafka_config = config.get("kafka", {})
            brokers = kafka_config.get("brokers", "localhost:9092")
            topic = kafka_config.get("topic", "simulator")
            kafkaconf = {'bootstrap.servers': brokers,'client.id': socket.gethostname()}
            producer = Producer(kafkaconf)
        else:    
            if destination == "file":
                file_config = config.get("file", {})
                filepath = file_config.get("filepath","output.json")
                destination_file = open(filepath, 'w+')
    

    #extract assets dimensions details
    asset_0_label = asset_0.get("label","asset_0")
    asset_0_nb_assets = asset_0.get("assets","3")
    asset_0_nb_dimensions = asset_0.get("dimensions","3")
    asset_0_dimensions_labels = asset_0.get("dimension_labels",[])
    asset_0_dimensions_types = asset_0.get("dimension_types",[])
    asset_0_dimensions_values = asset_0.get("dimension_values",[])
    asset_1_label = asset_1.get("label","asset_1")
    asset_1_nb_assets = asset_1.get("assets","3")
    asset_1_nb_dimensions = asset_1.get("dimensions","3")
    asset_1_dimensions_labels = asset_1.get("dimension_labels",[])
    asset_1_dimensions_types = asset_1.get("dimension_types",[])
    asset_1_dimensions_values = asset_1.get("dimension_values",[])
    asset_1_nb_metrics = asset_1.get("metrics",3)
    asset_1_metrics_values = asset_1.get("metrics_values")
    asset_1_metrics_labels = asset_1.get("metrics_labels")

    while True:
        data = {
            "timestamp": int(time.time()*1000000)
        }

        for a0 in range(asset_0_nb_assets):

            #GENERIC: generate asset_0 IDs
            data[asset_0_label+"_id"] = asset_0_label+"_" + str(a0)

            #GENERIC: generate asset_0 dimensions
            for key in range(asset_0_nb_dimensions):
                values = asset_0_dimensions_values.get("d_" + str(key))
                labels = asset_0_dimensions_labels.get("d_" + str(key))
                types = asset_0_dimensions_types.get("d_" + str(key))
                if types == "fixed":
                    data[labels] = values[a0]
                else:
                    if types == "high_cardinality":
                        data[labels] = labels + "_" + str(random.randint(0, values + 1))
                    else: 
                        if types == "random":
                            data[labels] = random.choice(values)

            for a1 in range(asset_1_nb_assets):
                #GENERIC: generate asset_1 IDs
                data[asset_1_label+"_id"] = asset_1_label+"_" + str(a0)+"_"+str(a1)

                #GENERIC: generate asset_1 dimensions
                for key in range(asset_1_nb_dimensions):
                    values = asset_1_dimensions_values.get("d_" + str(key))
                    labels = asset_1_dimensions_labels.get("d_" + str(key))
                    types = asset_1_dimensions_types.get("d_" + str(key))
                    if types == "fixed":
                        data[labels] = values[a1]
                    else:
                        if types == "high_cardinality":
                            data[labels] = labels + "_" + str(random.randint(0, values + 1))
                        else: 
                            if types == "random":
                                data[labels] = random.choice(values)

                #GENERIC: generate metrics
                for key in range(asset_1_nb_metrics):
                    min_val, max_val = asset_1_metrics_values.get("m_" + str(key))
                    label = asset_1_metrics_labels.get("m_" + str(key))
                    data[label] = random.randint(min_val, max_val)
              
                #Custom: Implement your abnormal behavior here ->          
                if (inject_error == 'true'):
                    data["discount"] = random.randint(20, 25)
                    data["quantity"] = random.randint(1, 99)
                # -> end of abnormal behavior

                #GENERIC: publish the data
                if devmode:
                    print(json.dumps(data, indent=4), flush=True)
                else:
                    if destination == "kafka":
                        producer.produce(topic, key=data[asset_0_label+"_id"], value=json.dumps(data))
                        producer.poll(0)
                    else:
                        if destination == "file":
                            destination_file.write(json.dumps(data) + '\n')

        time.sleep(interval_secs)
Esempio n. 35
0
class ProducerServer:
    def __init__(self, input_file: str, topic_name: str, **kwargs):
        self.p = Producer({
            'bootstrap.servers': kwargs['bootstrap_servers'],
            'client.id': kwargs['client_id']
        })
        self.input_file = input_file
        self.topic_name = topic_name
        self.client = AdminClient(
            {'bootstrap.servers': kwargs['bootstrap_servers']})

    def check_topic_exists(self) -> bool:
        """Checks if the given topic exists in Kafka."""
        topic_metadata = self.client.list_topics(timeout=5)
        return self.topic_name in set(
            t.topic for t in iter(topic_metadata.topics.values()))  # noqa

    def create_topics(self):
        """Create kafka topic."""
        new_topics = [
            NewTopic(self.topic_name, num_partitions=3, replication_factor=1)
        ]
        result = self.client.create_topics(new_topics)
        for topic, f in result.items():
            try:
                f.result()  # The result itself is None
                logger.debug(f"Topic {topic} created")
            except Exception as e:
                logger.error(f"Failed to create topic {topic}: {e}")

    def delivery_report(self, err, msg):
        """ Called once for each message produced to indicate delivery result.
            Triggered by poll() or flush(). """
        if err is not None:
            logger.debug('Message delivery failed: {}'.format(err))
        else:
            logger.debug('Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))  # noqa

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return str(round(time.time() * 1000))

    def produce(self, message):
        """Produce record to kafka."""
        logger.debug(f'message: {message}')
        while True:
            try:
                self.p.produce(self.topic_name,
                               key=self.time_millis(),
                               value=self.dict_to_binary(message),
                               on_delivery=self.delivery_report)
                self.p.poll(0)
                break
            except BufferError as e:
                logger.error(e)
                self.p.poll(1)

    # TODO we're generating a dummy data
    def generate_data(self):
        # check if topic is exist or create
        if not self.check_topic_exists():
            self.create_topics()
        else:
            logger.debug(f'topic {self.topic_name} alreadly exist!')
        # read data from file
        with open(self.input_file, 'r') as f:
            messages = json.loads(f.read())
        # TODO send the correct data
        try:
            for message in messages:
                self.produce(message)
        except KeyboardInterrupt:
            logger.debug('aborted by user.')
        finally:
            self.p.flush()

    # TODO fill this in to return the json dictionary to binary
    def dict_to_binary(self, json_dict):
        """Json dictionary to binary."""
        return json.dumps(json_dict)
Esempio n. 36
0
from confluent_kafka import Producer
import requests

#producer配置,dict格式
p = Producer({
    'bootstrap.servers':
    '192.168.1.88:19092,192.168.1.88:29092,192.168.1.88:39092'
})


#回调函数
def delivery_report(err, msg):
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))


##发送
for data in [i for i in range(30)]:
    p.produce('test', str(data), partition=1, callback=delivery_report)

p.poll(10)  ##等待返回结果最大时常,单位秒
p.flush()
Esempio n. 37
0
class KafkaDestination(object):
    """ syslog-ng Apache Kafka destination.
    """

    _kafka_producer = None

    _conf = dict()

    def __init__(self):
        self.hosts = None
        self.topic = None
        self.msg_key = None
        self.partition = None
        self.programs = None
        self.group_id = None
        self.broker_version = None
        self.verbose = False
        self.display_stats = False
        self.producer_config = None

    def init(self, args):
        """ This method is called at initialization time.

        Should return False if initialization fails.
        """

        if 'producer_config' in args:
            try:
                self.producer_config = ast.literal_eval(args['producer_config'])
                self._conf.update(self.producer_config)
            except ValueError:
                LOG.error("Given config %s is not in a Python dict format."
                          % args['producer_config'])

        try:
            self.hosts = args['hosts']
            self.topic = args['topic']
            self._conf['bootstrap.servers'] = self.hosts
        except KeyError:
            LOG.error("Missing `hosts` or `topic` option...")
            return False

        if 'msg_key' in args:
            self.msg_key = args['msg_key']
            LOG.info("Message key used will be %s" % self.msg_key)

        if 'partition' in args:
            self.partition = args['partition']
            LOG.info("Partition to produce to %s" % self.partition)

        # optional `programs` parameter to filter out messages
        if 'programs' in args:
            self.programs = parse_str_list(args['programs'])
            LOG.info("Programs to filter against %s" % self.programs)

        if 'group_id' in args:
            self.group_id = args['group_id']
            self._conf['group.id'] = self.group_id
            LOG.info("Broker group_id=%s" % self.group_id)

        if 'broker_version' in args:
            self.broker_version = args['broker_version']
            if '.'.join(self.broker_version.split('.')[:2]) in ('0.10', '0.11'):
                self._conf['api.version.request'] = True
            else:
                self._conf['broker.version.fallback'] = self.broker_version
                self._conf['api.version.request'] = False
            LOG.info("Broker version=%s" % self.broker_version)
        else:
            self.broker_version = DEFAULT_BROKER_VERSION_FALLBACK
            self._conf[
                'broker.version.fallback'] = DEFAULT_BROKER_VERSION_FALLBACK
            self._conf['api.version.request'] = False
            LOG.warn("Default broker version fallback %s "
                     "will be applied here." % DEFAULT_BROKER_VERSION_FALLBACK)

        self._conf['on_delivery'] = delivery_callback
        if 'verbose' in args:
            # provide a global `on_delivery` callback in the `Producer()` config
            # dict better for memory consumptions vs per message callback.
            self.verbose = ast.literal_eval(args['verbose'])
        if not self.verbose:
            # only interested in delivery failures here. We do provide a
            # global on_delivery callback in the Producer() config dict and
            # also set delivery.report.only.error.
            self._conf['delivery.report.only.error'] = True
            LOG.info("Verbose mode is OFF: you will not be able to see "
                     "messages in here. Failures only. Use 'verbose=('True')' "
                     "in your destination options to see successfully "
                     "processed messages in your logs.")

        # display broker stats?
        if 'display_stats' in args:
            self.display_stats = ast.literal_eval(args['display_stats'])
        if self.display_stats:
            self._conf['stats_cb'] = stats_callback
            LOG.info("Broker statistics will be displayed.")

        LOG.info(
            "Initialization of Kafka Python driver w/ args=%s" % self._conf)
        return True

    def open(self):
        """ Open a connection to the Kafka service.

        Should return False if initialization fails.
        """
        LOG.info("Opening connection to the remote Kafka services at %s"
                 % self.hosts)
        self._kafka_producer = Producer(**self._conf)
        return True

    def is_opened(self):
        """ Check if the connection to Kafka is able to receive messages.

        Should return False if target is not open.
        """
        return self._kafka_producer is not None

    def close(self):
        """ Close the connection to the Kafka service.
        """
        LOG.debug("KafkaDestination.close()....")
        if self._kafka_producer is not None:
            LOG.debug("Flushing producer w/ a timeout of 30 seconds...")
            self._kafka_producer.flush(30)
        return True

    # noinspection PyMethodMayBeStatic
    def deinit(self):
        """ This method is called at deinitialization time.
        """
        LOG.debug("KafkaDestination.deinit()....")
        if self._kafka_producer:
            self._kafka_producer = None
        return True

    def send(self, ro_msg):
        """ Send a message to the target service

        It should return True to indicate success, False will suspend the
        destination for a period specified by the time-reopen() option.

        :return: True or False
        """

        # do nothing if msg is empty
        if not ro_msg:
            return True

        # no syslog-ng `values-pair` here we dealing with `LogMessage`
        if type(ro_msg) != dict:
            # syslog-ng `LogMessage` is read-only
            # goal is rfc5424 we cannot use values-pair because of memory leaks
            try:
                msg = {'FACILITY': ro_msg.FACILITY, 'PRIORITY': ro_msg.PRIORITY,
                       'HOST': ro_msg.HOST, 'PROGRAM': ro_msg.PROGRAM,
                       'DATE': ro_msg.DATE, 'MESSAGE': ro_msg.MESSAGE}
            except AttributeError:
                LOG.error("Your version of syslog-ng is not supported. "
                          "Please use syslog-ng 3.7.x")
                return False
        else:
            LOG.warn("You are using `values-pair` if you are using "
                     "syslog-ng <= 3.11 it is known to be leaking...")
            msg = ro_msg
        try:

            # check if we do have a program filter defined.
            msg_program = msg['PROGRAM']
            if self.programs is not None:
                if msg_program not in self.programs:
                    # notify of success
                    return True
            if msg_program == 'firewall':
                firewall_msg = msg['MESSAGE']
                msg['MESSAGE'] = parse_firewall_msg(firewall_msg)
            elif msg_program == 'nat':
                nat_msg = msg['MESSAGE']
                msg['MESSAGE'] = parse_nat_msg(nat_msg)
            # convert date string to UNIX timestamp
            msg_date = msg['DATE']
            if msg_date is not None:
                msg['DATE'] = date_str_to_timestamp(msg_date)

            msg_string = str(msg)

            kwargs = {}
            if self.msg_key and self.msg_key in msg.keys():
                kwargs['key'] = msg[self.msg_key]
            if self.partition:
                try:
                    kwargs['partition'] = int(self.partition)
                except ValueError:
                    LOG.warning(
                        "Ignore partition=%s because it is not an int."
                        % self.partition)

            self._kafka_producer.produce(self.topic, msg_string, **kwargs)

            # `poll()` doesn't do any sleeping at all if you give it 0, all
            # it does is grab a mutex, check a queue, and release the mutex.
            # It is okay to call poll(0) after each produce call, the
            # performance impact is negligible, if any.
            self._kafka_producer.poll(0)
        except BufferError:
            LOG.error("Producer queue is full. This message will be discarded. "
                      "%d messages waiting to be delivered.",
                      len(self._kafka_producer))
            # do not return False here as the destination would be closed
            # and we would have to restart syslog-ng
            sleep(5)
            return True
        except (KafkaException, UnicodeEncodeError) as e:
            LOG.error("An error occurred while trying to send messages...   "
                      "See details: %s" % e, exc_info=True)
            sleep(5)
            # do not return False here as the destination would be closed
            # and we would have to restart syslog-ng
            return True

        return True