예제 #1
0
def produce():
    value_schema = avro.loads(value_schema_str)
    key_schema = avro.loads(key_schema_str)

    def delivery_report(err, msg):
        """ Called once for each message produced to indicate delivery result.
            Triggered by poll() or flush(). """
        if err is not None:
            print('Message delivery failed: {}'.format(err))
        else:
            print('Message delivered to {} [{}]'.format(
                msg.topic(), msg.partition()))

    avro_producer = AvroProducer(
        {
            'bootstrap.servers': config.BOOTSTRAP_SERVERS,
            'on_delivery': delivery_report,
            'schema.registry.url': config.SCHEMA_REGISTRY_URL
        },
        default_key_schema=key_schema,
        default_value_schema=value_schema)

    cluster_metadata = avro_producer.list_topics()
    if TOPIC_NAME not in cluster_metadata.topics.keys():
        for name in most_common_names_usa:
            value = {"rank": name[0], "name": name[1], "data": name[2]}
            key = {"rank": name[0]}
            avro_producer.produce(topic=TOPIC_NAME, value=value, key=key)
        avro_producer.flush()
    else:
        print(f"{TOPIC_NAME} exists, do nothing")
예제 #2
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        bootstap_locations = 'PLAINTEXT://localhost:9092'
        self.broker_properties = {'bootstrap.servers': bootstap_locations}

        # TODO: Configure the AvroProducer

        producer_conf = {
            'bootstrap.servers': bootstap_locations,
            'schema.registry.url': 'http://localhost:8081',
            'client.id': "genericProducer"
        }
        self.producer = AvroProducer(
            producer_conf,
            default_key_schema=key_schema,
            default_value_schema=value_schema,
        )
        list_topics_in_kafka = self.producer.list_topics().topics.keys()

        # If the topic does not already exist, try to create it
        if (self.topic_name in list_topics_in_kafka):
            Producer.existing_topics.add(self.topic_name)
        elif self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)
        logger.info("producer attached")

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""

        admin = AdminClient(self.broker_properties)
        topics = [self.topic_name]
        new_topics = [
            NewTopic(topic,
                     num_partitions=self.num_partitions,
                     replication_factor=self.num_replicas) for topic in topics
        ]
        fs = admin.create_topics(new_topics)

        for topic, f in fs.items():
            try:
                f.result()  # The result itself is None
                logger.info("created new topic")
            except Exception as e:
                logger.info(
                    f"topic creation {self.topic_name} kafka integration incomplete  - skipping "
                )
                logger.info(e)

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        #
        #
        # TODO: Write cleanup code for the Producer here
        #
        #
        try:
            self.producer.flush()
        except Exception as e:
            logger.info("producer close incomplete - skipping")

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
예제 #3
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {
            'schema.registry.url': SCHEMA_REGISTRY_URL,
            'bootstrap.servers': BROKER_URL,
            'group.id': self.topic_name,
        }

        self.producer = AvroProducer(config=self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema)

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        #
        #
        # TODO: Write code that creates the topic for this producer if it does not already exist on
        # the Kafka Broker.
        #
        #
        if self.producer.list_topics(timeout=5).topics.get(
                self.topic_name) is None:
            new_topic = NewTopic(topic=self.topic_name,
                                 num_partitions=self.num_partitions,
                                 replication_factor=self.num_replicas,
                                 config={
                                     "cleanup.policy": "compact",
                                     "compression.type": "lz4"
                                 })
            client = AdminClient({"bootstrap.servers": BROKER_URL})
            result = client.create_topics([new_topic])

            for topic, future in result.items():
                try:
                    future.result()
                    logger.info(
                        f"successfully created topic {self.topic_name}")
                except Exception as e:
                    logger.error(f"failed to create topic {self.topic_name}")
                    raise
        else:
            logger.info(
                f"topic {self.topic_name} already exists, skipping creation")

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.producer.flush()
        logger.info("producer closed")

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "SCHEMA_REGISTRY_URL": "http://localhost:8085",
            "BROKER_URL": "PLAINTEXT://localhost:9092"
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.schema_registry = CachedSchemaRegistryClient(
            {"url": self.broker_properties["SCHEMA_REGISTRY_URL"]})
        self.producer = AvroProducer(
            {"bootstrap.servers": self.broker_properties["BROKER_URL"]},
            schema_registry=self.schema_registry,
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        #
        #
        # TODO: Write code that creates the topic for this producer if it does not already exist on
        # the Kafka Broker.
        #
        #
        """Creates the topic with the given topic name"""
        client = AdminClient(
            {"bootstrap.servers": self.broker_properties["BROKER_URL"]})
        futures = client.create_topics([
            NewTopic(topic=self.topic_name,
                     num_partitions=self.num_partitions,
                     replication_factor=self.num_replicas)
        ])
        for _, future in futures.items():
            try:
                future.result()
            except Exception as e:
                pass
        logger.info("Creating topic: " + self.topic_name)

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        #
        #
        # TODO: Write cleanup code for the Producer here
        #
        #
        if self.producer is not None:
            logger.debug("flushing producer...")
            self.producer.flush()
            client = AdminClient(
                {"bootstrap.servers": self.broker_properties["BROKER_URL"]})
            topic_dict = self.producer.list_topics().topics
            for topic in topic_dict.keys():
                client.delete_topics(topic_dict[topic])
            logger.info("Closing producer: " + self.topic_name)

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
예제 #5
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": 'PLAINTEXT://localhost:9092',
            "schema.registry.url": 'http://localhost:8081'
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer(
            self.broker_properties,
            default_key_schema=key_schema, 
            default_value_schema=value_schema
        )

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        #
        #
        # TODO: Write code that creates the topic for this producer if it does not already exist on
        # the Kafka Broker.
        #
        #
        if self.topic_name not in self.producer.list_topics():
            client = AdminClient(self.broker_properties)
            if self.topic_name not in client.list_topics().topics:
                futures = client.create_topics([NewTopic(topic=self.topic_name,
                                                     num_partitions=self.num_partitions,
                                                     replication_factor=self.num_replicas)])

                for topic, future in futures.items():
                    try:
                        future.result()
                        logger.info(f"Topic {topic} created")
                    except Exception as e:
                        logger.fatal(f"Failed to create topic {topic}: {e}")
        else:
            logger.info(f"Topic {self.topic_name} already exists")
        

    # def time_millis(self):
    #     return int(round(time.time() * 1000))

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        #
        #
        # TODO: Write cleanup code for the Producer here
        #
        #
        if self.producer is not None:
            logger.debug("flushing producer...")
            self.producer.flush()

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
예제 #6
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        self.broker_properties = {
            'schema.registry.url': SCHEMA_REGISTRY_URL,
            'bootstrap.servers': BROKER_URL,
            'client.id': "ex4",
            'linger.ms': 1000,
            'compression.type': 'lz4',
            'batch.num.messages': 100,
        }

        self.producer = AvroProducer(
            self.broker_properties,
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema,
        )

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""

        # If the topic does not already exist, try to create it
        if self.topic_name in self.producer.list_topics().topics.keys():
            logger.info(
                f'Topic {self.topic_name} already exists, skipping topic creation'
            )

        client = AdminClient({'bootstrap.servers': BROKER_URL})
        futures = client.create_topics([
            NewTopic(topic=self.topic_name,
                     num_partitions=self.num_partitions,
                     replication_factor=self.num_replicas,
                     config={
                         "cleanup.policy": "delete",
                         "compression.type": "lz4",
                         "delete.retention.ms": 2000,
                         "file.delete.delay.ms": 2000
                     })
        ])

        for _, future in futures.items():
            try:
                future.result()
                print(f'Topic: {self.topic_name} successfully created')
            except Exception as e:
                print(f'Failed to create topic: {self.topic_name}, {e}')
                raise

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""

        self.producer.flush()
        self.producer.close()

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))