def create_topic(self, topic_name: str, num_partitions: int = 1, replication_factor: int = 1): """ Creates a new topic if not already present :param topic_name: New topic name :param num_partitions: Number of partitions. Default value is 1. :param replication_factor: Replication factor. Default value is 1. """ logger.debug(f"Creating a topic called {topic_name}") admin = KafkaAdminClient(**self.settings) existing_topics = admin.list_topics() if topic_name in existing_topics: admin.close() return topic = [ NewTopic(topic_name, num_partitions=num_partitions, replication_factor=replication_factor) ] admin.create_topics(topic) admin.close() logger.debug(f"Topic {topic_name} created")
def clean_topics(): kafka_admin_client = KafkaAdminClient( bootstrap_servers=['192.168.56.101:29094']) topics = kafka_admin_client.list_topics() my_topics = [] for topic in topics: if "owid-covid" in topic: print(topic) my_topics.append(topic) kafka_admin_client.delete_topics(my_topics)
def get_movies(TOPIC_NAME, utils_predics): producer = KafkaProducer(bootstrap_servers='localhost:9092') admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092", client_id='test') server_topics = admin_client.list_topics() """ if TOPIC_NAME not in server_topics: topic = NewTopic(name=str(TOPIC_NAME), num_partitions=1, replication_factor=1) admin_client.create_topics(new_topics=topic, validate_only=False) """ async def Scrapper(topic_=TOPIC_NAME, utils_predics=utils_predics) -> list: url_top = "https://www.rottentomatoes.com/top/bestofrt/?year=2020" url_home = "https://www.rottentomatoes.com" audiance_link = "/reviews?type=user" try: uClient = uReq(url_top).read() except URLError: return 'FATAL ERROR ACCESSING SERVER.......... RETRY..... ' top_movies = soup(uClient, "html.parser").find("table", {"class": "table"}) titles_tag = top_movies.findAll("a", {"class": "unstyled articleLink"}) ranks = top_movies.findAll("span", {"class": "tMeterScore"}) Movies = [] with ThreadPoolExecutor(max_workers=10) as executor: loop = asyncio.get_event_loop() tasks = [ loop.run_in_executor( executor, process_movie, *(topic_, title_tag, rank, url_home, audiance_link, utils_predics)) for title_tag, rank in tqdm(zip(titles_tag, ranks), total=max([len(titles_tag), len(ranks)])) ] for response in await asyncio.gather(*tasks): if response != 'FAILED': Movies.append(response.__dict__) return Movies start_time = time.time() loop = asyncio.get_event_loop() future = asyncio.ensure_future(Scrapper()) msg = loop.run_until_complete(future) # producer.send(str(TOPIC_NAME), json.dumps(msg).encode('utf-8')) print(f"====Execution Time : {(time.time() - start_time)} seconds====") return str(len(msg)) + " Movies scrapped"
def wait_for_kafka(port: int, *, hostname: str = "127.0.0.1", wait_time: float = 20.0) -> None: start_time = time.monotonic() bootstrap_server = f"{hostname}:{port}" while True: if time.monotonic() - start_time > wait_time: raise Timeout(f"Could not contact kafka cluster on host {hostname}, port {port}") try: client = KafkaAdminClient(bootstrap_servers=bootstrap_server) _ = client.list_topics() return # pylint: disable=broad-except except Exception as e: print(f"Error checking kafka cluster: {e}") time.sleep(2.0)
def test_runner(self): """ Generalized producer consumer test. Assuming there is an empty test kafka rollout locally at port 9092 """ test_run_id = str(uuid.uuid1()) test_config = StatsProcessorConfig() test_config.servers = ['localhost:9092'] test_config.topics = ['test-topic'] test_config.client_id = 'test-client' # Remove existing topics admin_client = KafkaAdminClient(bootstrap_servers=test_config.servers, client_id='test-admin') kafka_topics = admin_client.list_topics() existing_topics = set(test_config.topics).intersection(kafka_topics) if len(existing_topics) > 0: admin_client.delete_topics(existing_topics) # Initialize the database local_db_mngr = LocalDataManager(self.test_dir) local_db_mngr.initialize_db() runner = StatsCollectorRunner(self.test_dir, test_config) runner.initialize_runner() test_producer = TestEventsProducer(kafka_servers=test_config.servers, test_topic=test_config.topics[0], test_run_id=test_run_id) test_producer.start() runner.start() time.sleep(10) # run for 10 seconds runner.stop() print('Runner stopping') runner.join() print('Runner exited') test_producer.stop() print('Producer stopping') test_producer.join() print('Producer exited') with local_db_mngr as bench: print(bench.get_run_nodes(test_run_id))
def process_notification(self): bootstrap_servers = self._yaml_config.get('bootstrap_servers') read_example_topic = self._yaml_config.get( 'read_example_topic') write_example_topic = self._yaml_config.get( 'write_example_topic') admin_client = KafkaAdminClient( bootstrap_servers=bootstrap_servers) topics = admin_client.list_topics() if read_example_topic in topics: process = Popen(args=[ 'kafka-topics.sh', '--bootstrap-server', bootstrap_servers, '--delete', '--topic', read_example_topic, ], shell=False) print('Delete kafka topic {} status: {}'.format( read_example_topic, process.wait())) if write_example_topic in topics: process = Popen(args=[ 'kafka-topics.sh', '--bootstrap-server', bootstrap_servers, '--delete', '--topic', write_example_topic, ], shell=False) print('Delete kafka topic {} status: {}'.format( write_example_topic, process.wait())) # Create inference online read example topic. admin_client.create_topics(new_topics=[ NewTopic(name=read_example_topic, num_partitions=1, replication_factor=1) ]) # Create inference vector write example topic. admin_client.create_topics(new_topics=[ NewTopic(name=write_example_topic, num_partitions=1, replication_factor=1) ]) self.generate_read_example()
def initialize_kafka_topics(self): """ Make sure the server connection is possible and the tracked topics exist """ admin_client = KafkaAdminClient(bootstrap_servers=self._servers, client_id=self._client_id) existing_topics = admin_client.list_topics() if self._request_topic not in existing_topics: request_topic = NewTopic(name=self._request_topic, num_partitions=1, replication_factor=1) admin_client.create_topics(new_topics=[request_topic], validate_only=False) if self._response_topic not in existing_topics: response_topic = NewTopic(name=self._response_topic, num_partitions=1, replication_factor=1) admin_client.create_topics(new_topics=[response_topic], validate_only=False)
import time if __name__ == '__main__': admin = KafkaAdminClient() used_topics = ( "topic_oferte", "topic_rezultat", "topic_oferte_procesate", "topic_notificare_procesor_mesaje", ) # se sterg topic-urile, daca exista deja print("Se sterg topic-urile existente...") kafka_topics = admin.list_topics() for topic in kafka_topics: if topic in used_topics: print("\tSe sterge {}...".format(topic)) admin.delete_topics(topics=[topic], timeout_ms=2000) # se asteapta putin ca stergerea sa aiba loc time.sleep(2) # se creeaza topic-urile necesare aplicatiei print("Se creeaza topic-urile necesare:") lista_topicuri = [ NewTopic(name=used_topics[0], num_partitions=4, replication_factor=1), NewTopic(name=used_topics[1], num_partitions=1, replication_factor=1), NewTopic(name=used_topics[2], num_partitions=1, replication_factor=1), NewTopic(name=used_topics[3], num_partitions=1, replication_factor=1)
def latest_topic(self): admin = KafkaAdminClient(bootstrap_servers=self.kafka_brokers) topic = sorted(admin.list_topics())[-1] print(topic) admin.close() return topic
""" Description: Test for Kafka features: 1. Kafka Topic and Topic key 2. Kafka Partitions """ from kafka import KafkaConsumer, TopicPartition, KafkaAdminClient __consumer = KafkaConsumer(bootstrap_servers="localhost:9092") con_topics = __consumer.topics() part_of_topic = __consumer.partitions_for_topic(topic="logging_test") print(f"All topics: {con_topics}") print(f"All topics partition: {part_of_topic}") # __test_topic = TopicPartition(topic="test", partition=1) __admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") list_topics = __admin_client.list_topics() list_consumer_groups = __admin_client.list_consumer_groups() print(f"list_topics: {list_topics}") print(f"list_consumer_groups: {list_consumer_groups}")