Esempio n. 1
0
 def __init__(self, conf):
     self.producer = Producer(conf)
Esempio n. 2
0
from config import Config
from confluent_kafka import Producer
from slack import WebClient
from slack.errors import SlackApiError

# Bot User OAuth Access Token
# used scopes: channels:history, channels:read, chat:write, im:history, mpim:history, users:read
token = os.environ["SLACK_BOT_TOKEN"]

# Slack API 초기화
sc = WebClient(token)

# Kafka Producer 만들기  "localhost:9092"
settings = {"bootstrap.servers": Config.MY_SERVER}
p = Producer(settings)


def acked(err, msg):  # callback
    if err is not None:
        print("Failed to deliver message: {0}: {1}".format(
            msg.value(), err.str()))
    else:
        print("Message produced: {0}".format(msg.value()))  # binary


channel = "C01FVD0QD42"  # 아래 sc.conversations_list로 id를 확인

# channel_name = "일반"
# try:
#     sc_response = sc.conversations_list(channel=channel)
Esempio n. 3
0
 def initProducer(self):
     if "kafka_producer" in self.config:
         self.kafkaProducer = Producer(self.config["kafka_producer"])
     else:
         raise Exception("Cannot find Kafka Producer configuration.")
Esempio n. 4
0
    def __init__(
        self,
        storage_key: StorageKey,
        raw_topic: Optional[str],
        replacements_topic: Optional[str],
        max_batch_size: int,
        max_batch_time_ms: int,
        bootstrap_servers: Sequence[str],
        group_id: str,
        commit_log_topic: Optional[str],
        auto_offset_reset: str,
        queued_max_messages_kbytes: int,
        queued_min_messages: int,
        processes: Optional[int],
        input_block_size: Optional[int],
        output_block_size: Optional[int],
        commit_retry_policy: Optional[RetryPolicy] = None,
        profile_path: Optional[str] = None,
    ) -> None:
        self.storage = get_writable_storage(storage_key)
        self.bootstrap_servers = bootstrap_servers

        stream_loader = self.storage.get_table_writer().get_stream_loader()

        self.raw_topic: Topic
        if raw_topic is not None:
            self.raw_topic = Topic(raw_topic)
        else:
            self.raw_topic = Topic(
                stream_loader.get_default_topic_spec().topic_name)

        self.replacements_topic: Optional[Topic]
        if replacements_topic is not None:
            self.replacements_topic = Topic(replacements_topic)
        else:
            replacement_topic_spec = stream_loader.get_replacement_topic_spec()
            if replacement_topic_spec is not None:
                self.replacements_topic = Topic(
                    replacement_topic_spec.topic_name)
            else:
                self.replacements_topic = None

        self.commit_log_topic: Optional[Topic]
        if commit_log_topic is not None:
            self.commit_log_topic = Topic(commit_log_topic)
        else:
            commit_log_topic_spec = stream_loader.get_commit_log_topic_spec()
            if commit_log_topic_spec is not None:
                self.commit_log_topic = Topic(commit_log_topic_spec.topic_name)
            else:
                self.commit_log_topic = None

        # XXX: This can result in a producer being built in cases where it's
        # not actually required.
        self.producer = Producer({
            "bootstrap.servers":
            ",".join(self.bootstrap_servers),
            "partitioner":
            "consistent",
            "message.max.bytes":
            50000000,  # 50MB, default is 1MB
        })

        self.metrics = MetricsWrapper(
            environment.metrics,
            "consumer",
            tags={
                "group": group_id,
                "storage": storage_key.value
            },
        )

        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = group_id
        self.auto_offset_reset = auto_offset_reset
        self.queued_max_messages_kbytes = queued_max_messages_kbytes
        self.queued_min_messages = queued_min_messages
        self.processes = processes
        self.input_block_size = input_block_size
        self.output_block_size = output_block_size
        self.__profile_path = profile_path

        if commit_retry_policy is None:
            commit_retry_policy = BasicRetryPolicy(
                3,
                constant_delay(1),
                lambda e: isinstance(e, KafkaException) and e.args[0].code() in
                (
                    KafkaError.REQUEST_TIMED_OUT,
                    KafkaError.NOT_COORDINATOR,
                    KafkaError._WAIT_COORD,
                ),
            )

        self.__commit_retry_policy = commit_retry_policy
Esempio n. 5
0
from confluent_kafka import Producer
from faker import Faker
import json

p = Producer({
    'bootstrap.servers':
    'b-2.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092,b-1.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092,b-3.xxxx.xxxx.xxxx.kafka.us-east-1.amazonaws.com:9092'
})


def delivery_report(err, msg):

    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))


fake = Faker('en_US')


def gen_ran_data(i):
    data = {}
    data["ID"] = i
    data["name"] = fake.name()
    data["address"] = fake.address()
    data["Email-ID"] = fake.safe_email()
    return data

Esempio n. 6
0
 def __init__(self, conf, topic_name):
     self.topic_name = topic_name
     self.producer = Producer(conf)
     self.counter = 0
     self.running = True
                            days=1)
                    if update_order_date_modified > orders_dict_date_modified and update_order not in r_new:
                        # Check if > condition is enough (it was => before)
                        dict_update_orders[update_order_id] = {
                            "order_id": update_order_id,
                            "date_created": update_order['date_created_gmt'],
                            "date_modified": update_order['date_modified_gmt']
                        }
    orders = dict_new_orders.copy()
    orders.update(dict_update_orders)
    return orders, date_created


if __name__ == '__main__':
    create_tables()
    p = Producer({'bootstrap.servers': 'localhost:9092,localhost:9093'})
    date_created, date_updated = get_last_updated_at()
    sleep_time = 3
    loop_value = 0
    try:
        while True:
            orders_dict = []
            loop_value += 1
            if loop_value >= 5:
                # Load orders only every 5th iteration
                loop_value = -1
                orders_dict = get_orders_dict()
            #print('Sleeping for {0} seconds...'.format(sleep_time))
            time.sleep(sleep_time)
            orders, date_created = get_woocommerce_orders(
                date_created, date_updated, orders_dict)
Esempio n. 8
0
    def run(self):
        def fail_fast(err, msg):
            if err is not None:
                print("Kafka producer delivery error: {}".format(err))
                print("Bailing out...")
                # TODO: should it be sys.exit(-1)?
                raise KafkaException(err)

        def on_commit(err, partitions):
            if err is not None:
                print("Kafka consumer commit error: {}".format(err))
                print("Bailing out...")
                # TODO: should it be sys.exit(-1)?
                raise KafkaException(err)
            for p in partitions:
                # check for partition-specific commit errors
                print(p)
                if p.error:
                    print("Kafka consumer commit error: {}".format(p.error))
                    print("Bailing out...")
                    # TODO: should it be sys.exit(-1)?
                    raise KafkaException(p.error)
            print("Kafka consumer commit successful")
            pass

        def on_rebalance(consumer, partitions):
            for p in partitions:
                if p.error:
                    raise KafkaException(p.error)
            print("Kafka partitions rebalanced: {} / {}".format(
                consumer, partitions))

        consumer_conf = self.kafka_config.copy()
        consumer_conf.update({
            'group.id': self.consumer_group,
            'on_commit': fail_fast,
            # messages don't have offset marked as stored until pushed to
            # elastic, but we do auto-commit stored offsets to broker
            'enable.auto.commit': True,
            'enable.auto.offset.store': False,
            # user code timeout; if no poll after this long, assume user code
            # hung and rebalance (default: 5min)
            'max.poll.interval.ms': 180000,
            'default.topic.config': {
                'auto.offset.reset': 'latest',
            },
        })
        consumer = Consumer(consumer_conf)

        producer_conf = self.kafka_config.copy()
        producer_conf.update({
            'delivery.report.only.error': True,
            'default.topic.config': {
                'request.required.acks': -1,  # all brokers must confirm
            },
        })
        producer = Producer(producer_conf)

        consumer.subscribe(
            [self.consume_topic],
            on_assign=on_rebalance,
            on_revoke=on_rebalance,
        )
        print("Kafka consuming {}".format(self.consume_topic))

        while True:
            msg = consumer.poll(self.poll_interval)
            if not msg:
                print("nothing new from kafka (poll_interval: {} sec)".format(
                    self.poll_interval))
                continue
            if msg.error():
                raise KafkaException(msg.error())

            cle = json.loads(msg.value().decode('utf-8'))
            #print(cle)
            print("processing changelog index {}".format(cle['index']))
            release_ids = []
            new_release_ids = []
            file_ids = []
            fileset_ids = []
            webcapture_ids = []
            container_ids = []
            work_ids = []
            release_edits = cle['editgroup']['edits']['releases']
            for re in release_edits:
                release_ids.append(re['ident'])
                # filter to direct release edits which are not updates
                if not re.get('prev_revision') and not re.get(
                        'redirect_ident'):
                    new_release_ids.append(re['ident'])
            file_edits = cle['editgroup']['edits']['files']
            for e in file_edits:
                file_ids.append(e['ident'])
            fileset_edits = cle['editgroup']['edits']['filesets']
            for e in fileset_edits:
                fileset_ids.append(e['ident'])
            webcapture_edits = cle['editgroup']['edits']['webcaptures']
            for e in webcapture_edits:
                webcapture_ids.append(e['ident'])
            container_edits = cle['editgroup']['edits']['containers']
            for e in container_edits:
                container_ids.append(e['ident'])
            work_edits = cle['editgroup']['edits']['works']
            for e in work_edits:
                work_ids.append(e['ident'])

            # TODO: do these fetches in parallel using a thread pool?
            for ident in set(file_ids):
                file_entity = self.api.get_file(ident, expand=None)
                # update release when a file changes
                # TODO: also fetch old version of file and update any *removed*
                # release idents (and same for filesets, webcapture updates)
                release_ids.extend(file_entity.release_ids or [])
                file_dict = self.api.api_client.sanitize_for_serialization(
                    file_entity)
                producer.produce(
                    self.file_topic,
                    json.dumps(file_dict).encode('utf-8'),
                    key=ident.encode('utf-8'),
                    on_delivery=fail_fast,
                )

            # TODO: topic for fileset updates
            for ident in set(fileset_ids):
                fileset_entity = self.api.get_fileset(ident, expand=None)
                # update release when a fileset changes
                release_ids.extend(file_entity.release_ids or [])

            # TODO: topic for webcapture updates
            for ident in set(webcapture_ids):
                webcapture_entity = self.api.get_webcapture(ident, expand=None)
                # update release when a webcapture changes
                release_ids.extend(webcapture_entity.release_ids or [])

            for ident in set(container_ids):
                container = self.api.get_container(ident)
                container_dict = self.api.api_client.sanitize_for_serialization(
                    container)
                producer.produce(
                    self.container_topic,
                    json.dumps(container_dict).encode('utf-8'),
                    key=ident.encode('utf-8'),
                    on_delivery=fail_fast,
                )

            for ident in set(release_ids):
                release = self.api.get_release(
                    ident, expand="files,filesets,webcaptures,container")
                if release.work_id:
                    work_ids.append(release.work_id)
                release_dict = self.api.api_client.sanitize_for_serialization(
                    release)
                producer.produce(
                    self.release_topic,
                    json.dumps(release_dict).encode('utf-8'),
                    key=ident.encode('utf-8'),
                    on_delivery=fail_fast,
                )
                # for ingest requests, filter to "new" active releases with no matched files
                if release.ident in new_release_ids:
                    ir = release_ingest_request(
                        release, ingest_request_source='fatcat-changelog')
                    if ir and not release.files and self.want_live_ingest(
                            release, ir):
                        producer.produce(
                            self.ingest_file_request_topic,
                            json.dumps(ir).encode('utf-8'),
                            #key=None,
                            on_delivery=fail_fast,
                        )

            # send work updates (just ident and changelog metadata) to scholar for re-indexing
            for ident in set(work_ids):
                assert ident
                key = f"work_{ident}"
                work_ident_dict = dict(
                    key=key,
                    type="fatcat_work",
                    work_ident=ident,
                    updated=cle['timestamp'],
                    fatcat_changelog_index=cle['index'],
                )
                producer.produce(
                    self.work_ident_topic,
                    json.dumps(work_ident_dict).encode('utf-8'),
                    key=key.encode('utf-8'),
                    on_delivery=fail_fast,
                )

            producer.flush()
            # TODO: publish updated 'work' entities to a topic
            consumer.store_offsets(message=msg)
Esempio n. 9
0
def lambda_handler(event, context):
    covid19_api_raw_data_url = os.getenv('COVID19_API_RAW_DATA_URL')
    covid19_api_state_data_url = os.getenv('COVID19_API_STATE_DATA_URL')
    covid19_api_test_data_url = os.getenv('COVID19_API_TEST_DATA_URL')
    bootstrap_servers = os.getenv('BOOTSTRAP_SERVERS')
    kafka_client_id = os.getenv('KAFKA_CLIENT_ID')
    kafka_patient_data_topic_name = os.getenv('KAFKA_PATIENT_DATA_TOPIC_NAME')
    kafka_state_data_topic_name = os.getenv('KAFKA_STATE_DATA_TOPIC_NAME')
    kafka_test_data_topic_name = os.getenv('KAFKA_TEST_DATA_TOPIC_NAME')
    telegram_bot_token = os.getenv('TELEGRAM_BOT_TOKEN')
    telegram_chat_id = os.getenv('TELEGRAM_CHAT_ID')

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'client.id': kafka_client_id,
        'linger.ms': '1000'
    }

    producer = Producer(conf, logger=logger)

    bot = Bot(token=telegram_bot_token)

    # import raw patient data from API
    resp = requests.get(url=covid19_api_raw_data_url)
    data = resp.json()

    # raw_data3.json starting point (27/04/2020)
    # patient number no longer represents a meaningful number. Just another id, treat it like that.
    patient_number = 27892
    for p in data['raw_data']:
        try:
            producer.produce(topic=kafka_patient_data_topic_name,
                             value=json.dumps(p),
                             key=str(patient_number),
                             on_delivery=acked)
            patient_number += 1
        except BufferError:
            logger.error(
                '%% Local producer queue is full (%d messages awaiting delivery): try again\n'
                % len(producer))
        producer.poll(0)
    logger.info('%% Waiting for %d deliveries\n' % len(producer))
    producer.flush()
    raw_data_count = len(data['raw_data'])

    # import statewise testing data from API
    resp = requests.get(url=covid19_api_test_data_url)
    data = resp.json()
    test_data_count = 0
    old = None
    for p in data['states_tested_data']:
        try:
            if p['totaltested'] == '':
                continue
            key = dict({u'state': p['state'], u'date': p['updatedon']})
            if old is not None and p['state'] == old['state']:
                p['testreportedtoday'] = str(
                    int(p['totaltested']) - int(old['totaltested']))
                if p['positive'] and old[
                        'positive']:  # this data can be missing
                    p['positivereportedtoday'] = str(
                        int(p['positive']) - int(old['positive']))
                else:
                    p['positivereportedtoday'] = ""
            else:
                p['testreportedtoday'] = p['totaltested']
                if p['positive']:  # this data can be missing
                    p['positivereportedtoday'] = p['positive']
                else:
                    p['positivereportedtoday'] = ""
            test_data_count += 1
            old = p
            producer.produce(topic=kafka_test_data_topic_name,
                             value=json.dumps(p),
                             key=json.dumps(key),
                             on_delivery=acked)
        except BufferError:
            logger.error(
                '%% Local producer queue is full (%d messages awaiting delivery): try again\n'
                % len(producer))
        producer.poll(0)
    logger.info('%% Waiting for %d deliveries\n' % len(producer))
    producer.flush()

    bot.send_message(
        chat_id=telegram_chat_id,
        text='Imported {} patients and {} testing data into Kafka'.format(
            raw_data_count, test_data_count))
Esempio n. 10
0
from confluent_kafka import Producer

p = Producer({'bootstrap.servers': '43.240.97.180:9092'})


def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))


count = 0

while True:
    # Trigger any available delivery report callbacks from previous produce() calls
    p.poll(0)
    count += 1
    # Asynchronously produce a message, the delivery report callback
    # will be triggered from poll() above, or flush() below, when the message has
    # been successfully delivered or failed permanently.
    try:
        p.produce('stream-sim', ("message " + str(count)).encode('utf-8'),
                  callback=delivery_report)
    except BufferError:
        continue
"""
p.poll(0)
Esempio n. 11
0
import time

from confluent_kafka import Producer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField

print("start 1p_multiples")

broker = 'kafka:9093'
topic = 'multiples'
conf = {'bootstrap.servers': broker}

p = Producer(**conf)
s = StringSerializer()
print("created KafkaPC")

ctx = SerializationContext(topic, MessageField.VALUE)
for i in range(10):

    # casts int to string for StringSerializer/StringDeserializer
    message = s(str(i*i), ctx)

    # DeprecationWarning will be resolved in upcoming release
    # https://github.com/confluentinc/confluent-kafka-python/issues/763
    p.produce(topic, message)

    print(f"Sent message {i*i}")
    time.sleep(1)
Esempio n. 12
0
from confluent_kafka import Producer
p = Producer({
    'bootstrap.servers':
    '10.245.146.221:9092,10.245.146.231:9092,10.245.146.232:9092'
})


def delivery_report(err, msg):
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))


some_data_source = [
    "111111111", "222222222", "3333333", "444444444", "555555555", "66666666"
]
while True:
    for data in some_data_source:
        p.poll(0)
        p.produce('test', data.encode('utf-8'), callback=delivery_report)
        p.flush()
Esempio n. 13
0
    'broker.version.fallback': '0.10.0.0',
    'api.version.fallback.ms': 0,
    'sasl.mechanisms': 'PLAIN',
}

if config.KAFKA_KEY and config.KAFKA_SECRET:
    data = {
        'security.protocol': 'SASL_SSL',
        'sasl.username': config.KAFKA_KEY,
        'sasl.password': config.KAFKA_SECRET,
        **DATA
    }
else:
    data = DATA

p = Producer(data)

print("Publishing message...")
if len(sys.argv) < 2:
    print("missing number of values to send")
    sys.exit(1)


def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))
Esempio n. 14
0
from confluent_kafka import Consumer, Producer
from kafka import KafkaProducer
import json
import time

BROKER_URL = "PLAINTEXT://localhost:9092"
TOPIC_NAME = "mitopico"

p = Producer({"bootstrap.servers": BROKER_URL})
#p2 = KafkaProducer(bootstrap_servers='localhost:9092')
file = '/home/workspace/data/uber.json'
with open(file) as f:
    for line in f:
        message = json.dumps(line).encode('utf-8')
        p.produce(TOPIC_NAME, message)
        time.sleep(1)
Esempio n. 15
0
import io
import logging
import os
import time

from PIL import Image
from confluent_kafka import Consumer, Producer

producer = Producer({'bootstrap.servers': os.environ.get("KAFKA", "localhost:9092"), "message.send.max.retries": 2})


def run_consumer(queue, msg_handler):
    consumer = Consumer({
        'bootstrap.servers': os.environ.get("KAFKA", "localhost:9092"),
        'group.id': 'manager',
        'auto.offset.reset': 'earliest'  # earliest _committed_ offset
    })

    _wait_for_topic_to_exist(consumer, queue)

    logging.info("Subscribing to topic: %s", queue)
    consumer.subscribe([queue])

    while True:
        logging.debug("Waiting for messages in %r...", queue)
        msg = consumer.poll()

        if msg is None:
            logging.warning("Poll timed out")
            break
Esempio n. 16
0
    def __init__(self, brokers: str, row_count: int,
                 disable_progress_bar: bool):
        from confluent_kafka import Producer

        self.producer = Producer({"bootstrap.servers": brokers})
        super().__init__(brokers, row_count, disable_progress_bar)
Esempio n. 17
0
async def produce(topic_name):
    """Produces data into the Kafka Topic"""
    p = Producer({"bootstrap.servers": BROKER_URL})
    while True:
        p.produce(topic_name, ClickEvent().serialize())
        await asyncio.sleep(1.0)
Esempio n. 18
0
 def __init__(self, broker, topic):
     self.broker = broker
     self.topic = topic
     self.bootstrap_servers = {'bootstrap.servers': self.broker}
     self.producer = Producer(self.bootstrap_servers)
Esempio n. 19
0
    def __init__(self, topic, rate, conf):
        """ SoakClient constructor. conf is the client configuration """
        self.topic = topic
        self.rate = rate
        self.disprate = int(rate * 10)
        self.run = True
        self.stats_cnt = {'producer': 0, 'consumer': 0}
        self.start_time = time.time()

        self.last_rusage = None
        self.last_rusage_time = None
        self.proc = psutil.Process(os.getpid())

        self.logger = logging.getLogger('soakclient')
        self.logger.setLevel(logging.DEBUG)
        handler = logging.StreamHandler()
        handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
        self.logger.addHandler(handler)

        # Construct a unique id to use for metrics hostname so that
        # multiple instances of the SoakClient can run on the same machine.
        hostname = datadog.util.hostname.get_hostname()
        self.hostname = "py-{}-{}".format(hostname, self.topic)

        self.logger.info("SoakClient id {}".format(self.hostname))

        if 'group.id' not in conf:
            # Generate a group.id bound to this client and python version
            conf['group.id'] = 'soakclient-{}-{}-{}'.format(
                self.hostname, version()[0], sys.version.split(' ')[0])

        # Separate datadog config from client config
        datadog_conf = {k[len("datadog."):]: conf[k]
                        for k in conf.keys() if k.startswith("datadog.")}
        conf = {k: v for k, v in conf.items() if not k.startswith("datadog.")}

        # Set up datadog agent
        self.init_datadog(datadog_conf)

        def filter_config(conf, filter_out, strip_prefix):
            len_sp = len(strip_prefix)
            out = {}
            for k, v in conf.items():
                if len([x for x in filter_out if k.startswith(x)]) > 0:
                    continue
                if k.startswith(strip_prefix):
                    k = k[len_sp:]
                out[k] = v
            return out

        # Create topic (might already exist)
        aconf = filter_config(conf, ["consumer.", "producer."], "admin.")
        self.create_topic(self.topic, aconf)

        #
        # Create Producer and Consumer, each running in its own thread.
        #
        conf['stats_cb'] = self.stats_cb
        conf['statistics.interval.ms'] = 10000

        # Producer
        pconf = filter_config(conf, ["consumer.", "admin."], "producer.")
        pconf['error_cb'] = self.producer_error_cb
        self.producer = Producer(pconf)

        # Consumer
        cconf = filter_config(conf, ["producer.", "admin."], "consumer.")
        cconf['error_cb'] = self.consumer_error_cb
        cconf['on_commit'] = self.consumer_commit_cb
        self.logger.info("consumer: using group.id {}".format(cconf['group.id']))
        self.consumer = Consumer(cconf)

        # Create and start producer thread
        self.producer_thread = threading.Thread(target=self.producer_thread_main)
        self.producer_thread.start()

        # Create and start consumer thread
        self.consumer_thread = threading.Thread(target=self.consumer_thread_main)
        self.consumer_thread.start()
    'sasl.mechanism': 'PLAIN',
    'security.protocol': 'SASL_SSL',
    'sasl.username': '******',
    'sasl.password':
    '******',
    'group.id': str(uuid.uuid1(
    )),  # this will create a new consumer group on each invocation.
    'auto.offset.reset': 'earliest'
})

producer = Producer({
    'bootstrap.servers':
    'pkc-ep9mm.us-east-2.aws.confluent.cloud:9092',
    'sasl.mechanism':
    'PLAIN',
    'security.protocol':
    'SASL_SSL',
    'sasl.username':
    '******',
    'sasl.password':
    '******'
})

#
# This function is run in its own thread to do all processing associated
# with preparing an upload of a new file, and does everything except upload
# the chunks and metadata themselves, which is handled by a queue.
#


def upload_file(file):
    global kcs, chunk_size, upload_queue
Esempio n. 21
0
import uuid
import json

from confluent_kafka import Producer

p = Producer({'bootstrap.servers': 'localhost:9092', 'group.id': 'mygroup'})
TOPIC = "job"


class ProducerError(Exception):
    pass


def produce(topic, key, value):
    p.produce(topic, key=key, value=json.dumps(value).encode())
    p.poll(0.5)


def main():
    message = {"method": "ingest_covid_data", "params": {}}
    produce(TOPIC, key=str(uuid.uuid4()).encode(), value=message)


main()
Esempio n. 22
0
 def Producer(self, options):
     self.logThis("Initialising a producer on server " +
                  str(options.get("bootstrap.servers")))
     return Producer(options)
Esempio n. 23
0
from confluent_kafka import Producer

if __name__ == '__main__':
    # Parse the command line.
    parser = ArgumentParser()
    parser.add_argument('config_file', type=FileType('r'))
    args = parser.parse_args()

    # Parse the configuration.
    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    config_parser = ConfigParser()
    config_parser.read_file(args.config_file) # It's getting_started.ini
    config = dict(config_parser['default'])

    # Create Producer instance
    producer = Producer(config)

    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    def delivery_callback(err, msg):
        if err:
            print('ERROR: Message failed delivery: {}'.format(err))
        else:
            print("Produced event to topic {topic}: key = {key:12} value = {value:12}".format(
                topic=msg.topic(), key=msg.key().decode('utf-8'), value=msg.value().decode('utf-8')))

    # Produce data by selecting random values from these lists.
    topic = "purchases"
    user_ids = ['eabara', 'jsmith', 'sgarcia', 'jbernard', 'htanaka', 'awalther']
    products = ['book', 'alarm clock', 't-shirts', 'gift card', 'batteries']
Esempio n. 24
0
import json
import random
from confluent_kafka import Producer


p = Producer({'bootstrap.servers': '34.238.53.42:9092'})

def delivery_report(err, msg):
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))


path_data_pokemon = "data/pokedex.json"
data_s = open(path_data_pokemon, 'r' )
data_j = json.load(data_s)
data_s.close()

topic = "pokedex"

try:
	while True:
		index = random.randint(0,len(data_j)-1)
		pokemon = data_j[index]
		print(pokemon)
		p.produce(topic, str(pokemon), callback=delivery_report)
		p.poll(2.0)

except KeyboardInterrupt:
    pass
Esempio n. 25
0
# 用來接收從Consumer instance發出的error訊息
def error_cb(err):
    print('Error: %s' % err)


# 主程式進入點
if __name__ == '__main__':
    # 步驟1. 設定要連線到Kafka集群的相關設定
    props = {
        # Kafka集群在那裡?
        'bootstrap.servers': 'localhost:9092',    # <-- 置換成要連接的Kafka集群
        'error_cb': error_cb                      # 設定接收error訊息的callback函數
    }
    # 步驟2. 產生一個Kafka的Producer的實例
    producer = Producer(**props)
    # 步驟3. 指定想要發佈訊息的topic名稱
    topicName = 'ak03.fourpartition'
    msgCount = 10000
    try:
        print('Start sending messages ...')
        # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers])
        for i in range(0, msgCount):
            producer.produce(topicName, key=str(i), value='msg_' + str(i))
            producer.poll(0)  # <-- (重要) 呼叫poll來讓client程式去檢查內部的Buffer
            print('key={}, value={}'.format(str(i), 'msg_' + str(i)))
            time.sleep(3)  # 讓主執行緒停個3秒

        print('Send ' + str(msgCount) + ' messages to Kafka')
    except BufferError as e:
        # 錯誤處理
k_admin_client = kafka_admin.AdminClient({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS})

kafka_consumer = Consumer({
    'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS,
    'group.id': f"kafka-eof_{str(uuid.uuid4())}",
    'auto.offset.reset': 'earliest',
    'enable.auto.commit': False,
    'enable.auto.offset.store': False,
    'enable.partition.eof': False
})

kafka_consumer.subscribe([KAFKA_TOPIC_IN_0, KAFKA_TOPIC_IN_1])
# kafka_consumer.assign([TopicPartition(KAFKA_TOPIC_IN_0), TopicPartition(KAFKA_TOPIC_IN_1)])

# create a Kafka producer
kafka_producer = Producer({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS,
                           "transactional.id": 'eos-transactions1.py'})


@pytest.mark.tryfirst()
def delivery_report(err, msg):
    """ Delivery callback for Kafka Produce. Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        if VERBOSE:
            # get the sent message using msg.value()
            print(f"Message '{msg.key().decode('utf-8')}'  \tdelivered to topic '{msg.topic()}' [{msg.partition()}].")


@pytest.mark.tryfirst()
Esempio n. 27
0
from confluent_kafka import Producer
import socket

bootstrap_servers = "localhost:9092"
topic = "q-data"

conf = {
    "bootstrap.servers": bootstrap_servers,
    "client.id": socket.gethostname()
}

producer = Producer(conf)


def acked(err, msg):
    if err is not None:
        print("Failed to deliver message: %s: %s" % (str(msg), str(err)))
    else:
        print("Message produced: %s" % (str(msg)))


producer.produce(topic, key="42", value="73", callback=acked)

# Wait up to 1 second for events. Callbacks will be invoked during
# this method call if the message is acknowledged.
producer.poll(1)
Esempio n. 28
0
from confluent_kafka import Producer
import time

p = Producer({
    'bootstrap.servers': 'localhost:9092',
    "queue.buffering.max.ms": 1
})

# queue.buffering.max.ms": 2


def delivery_report(
    err,
    msg,
):
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message offset: {} delivered to {} [{}]'.format(
            msg.offset(), msg.topic(), msg.partition()))


m_count = 0
called = True

while m_count <= 10000:

    #p.poll(0.01)

    m_count += 1
    #if m_count%30 == 0:
 '''
 Because the KPI file is big, we emulate by reading chunk, using iterator and chunksize
 '''
 INPUT_DATA_FILE = args.input_file
 chunksize = int(args.chunksize)
 sleeptime = int(args.sleeptime)
 KAFKA_TOPIC = args.topic
 '''
 the time record is "TIME"
 we read data by chunk so we can handle a big sample data file
 '''
 input_data = pd.read_csv(INPUT_DATA_FILE,
                          parse_dates=['TIME'],
                          iterator=True,
                          chunksize=chunksize)
 kafka_producer = Producer({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVER})
 for chunk_data in input_data:
     '''
     now process each chunk
     '''
     chunk = chunk_data.dropna()
     for index, row in chunk.iterrows():
         '''
         Assume that when some data is available, we send it to Kafka in JSON
         '''
         json_data = json.dumps(row.to_dict(), default=datetime_converter)
         #check if any event/error sent
         print(f'DEBUG: Send {json_data} to Kafka')
         kafka_producer.produce(KAFKA_TOPIC,
                                json_data.encode('utf-8'),
                                callback=kafka_delivery_error)
Esempio n. 30
0
"""
Partially from https://github.com/confluentinc/confluent-kafka-python, code was/is under licence Apache 2.0.
"""
from confluent_kafka import Producer
from read_from_BigQuery import get_data_from_BigQuery_continuous_stateful

# TODO move to env files
p = Producer({
    'bootstrap.servers':
    'ec2-52-11-165-61.us-west-2.compute.amazonaws.com,\
        ec2-52-10-3-49.us-west-2.compute.amazonaws.com,\
        ec2-34-218-39-83.us-west-2.compute.amazonaws.com'
})


def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to topic {} partition [{}]'.format(
            msg.topic(), msg.partition()))


for data in get_data_from_BigQuery_continuous_stateful(100):
    # Trigger any available delivery report callbacks from previous produce() calls
    p.poll(0)

    # Asynchronously produce a message, the delivery report callback
    # will be triggered from poll() above, or flush() below, when the message has