SITE_COUNT = int(sys.argv[1]) MAX_LINKS_PER_PAGE = 1 parent_sites = [ 'site1.adsa-project.local', 'site2.adsa-project.local', 'site3.adsa-project.local' ] # %% g = DocumentGenerator(text_generator=MarkovTextGenerator( model='markov_textgen.json')) # Generate page objects pages = [] for i in range(SITE_COUNT): page = Page(title=cleanup(g.gen_sentence(3, 6)).title(), body=cleanup('. '.join([ g.gen_sentence(5, 15) for _ in range(random.randint(20, 30)) ])), links=[ random.randint(0, SITE_COUNT - 1) for _ in range(0, MAX_LINKS_PER_PAGE) ]) pages.append(page) # Map pages to parent sites for page in pages: parent_site = parent_sites[random.randint(0, 2)] page.set_parent_site(parent_site)
broker_topics = metadata.topics() # Deleting topic if it already exists. # This is NOT recommended in production. Deletion must admin_client = KafkaAdminClient(bootstrap_servers=kafka_servers) if topic_name in broker_topics: deletion = admin_client.delete_topics([topic_name]) try: future = client.cluster.request_update() client.poll(future=future) except KafkaError as e: print(e) pass admin_client.create_topics(new_topics=topic_list, validate_only=False) # Setting up producer print("Connecting producer to cluster...", flush=True) producer = KafkaProducer( bootstrap_servers=kafka_servers, max_block_ms=10000, # connection timeout value_serializer=lambda x: pickle.dumps(x)) # Sending a random sentence every 3 seconds gen = DocumentGenerator() while (1): payload = {"datetime": datetime.now(), "sentence": gen.gen_sentence()} print("Sending : ", payload, flush=True) producer.send(topic_name, payload) print("Sent.\n", flush=True) time.sleep(3)