Beispiel #1
0
def test_qps():
    # 创建生产者
    p = Producer(ip=g_ip, user=g_user, password=g_password)
    p.producer_declare()
    p.create_exchange(g_exchange, "topic")

    # 创建消费者
    consumers = []

    for queue_name in g_queue_name:
        for i in range(0, 3):
            consumers.append(
                Consumer(ip=g_ip, user=g_user, password=g_password))
            consumers[len(consumers) - 1].start_consumer(
                g_exchange, queue_name + str(i), queue_name + str(i))

    time.sleep(10)  # 等待10S, 让消费者绑定完成
    log.info("[test_qps] starting ...")

    try:
        target_time = g_test_secs
        start = time.time()
        stop = False
        while not stop:
            for queue_name in g_queue_name:
                for i in range(0, 3):
                    time.sleep(g_sleep_secs)
                    p.publish(g_exchange, queue_name + str(i),
                              '{"msg":"this is a test!"}')
                    curr = time.time()
                    if (curr - start) >= target_time:
                        stop = True
                        break
                if stop:
                    break

    except Exception as err:
        log.error("[test_qps] error: " + str(err))
    finally:
        for queue_name in g_queue_name:
            for i in range(0, 3):
                p.publish(g_exchange, queue_name + str(i), "quit")
        p.close()

        recev = 0
        last_time = 0.0
        for c in consumers:
            c.join()
            recev += c.number_of_msg()
            if c.stop_consume_time() > last_time:
                last_time = c.stop_consume_time()

        log.info("[test_qps] %d msg have been sent, start at %f" %
                 (p.number_of_msg(), p.start_publish_time()))
        log.info("[test_qps] %d msg have been received, end at %f" %
                 (recev, last_time))
        log.info("[test_qps] QPS: %f" % (recev /
                                         (last_time - p.start_publish_time())))
class Crawler:
  MAX_URL = 10

  def __init__(self):
    self.url_counter = 1
    self.document_client = DocumentClient()
    self.indexing_client = IndexingClient()
    self.pagerank_client = PagerankClient()
    self.producer = Producer('url_queue')
    self.consumer = Consumer('url_queue')

  def run(self):
    self.consumer.subscribe(self.run_for_url)

  def run_for_url(self, ch, method, properties, body):
    doc_url = body.decode("utf-8")
    print("[Crawler] Received %r" % doc_url)

    document_text = WebScraper.get_text(doc_url)
    document_links = WebScraper.get_links(doc_url)

    hash_object = hashlib.sha256(document_text.encode("utf-8"))
    digest = hash_object.hexdigest()

    doc_record = self.document_client.get_by_url(doc_url)
    if "id" not in doc_record:
      doc_record = self.document_client.create(doc_url, digest)

    doc_indexed = self.indexing_client.get_by_id(doc_record["id"])
    if "url" not in doc_indexed:
      self.indexing_client.index(doc_record["id"], doc_url, document_text)

    if doc_record["digest"] != digest:
      self.document_client.update_digest(doc_record["id"], digest)
      self.indexing_client.update_content(doc_record["id"], document_text)

    for link in document_links:
      if self.url_counter < Crawler.MAX_URL:
        self.url_counter += 1
        child_doc_record = self.document_client.get_by_url(link.geturl())
        if "id" not in child_doc_record:
          child_doc_record = self.document_client.create(link.geturl(), "digest")
        self.document_client.create_link(doc_record["id"], child_doc_record["id"])
        self.producer.publish(link.geturl())

    self.pagerank_client.update(doc_record["id"])
Beispiel #3
0
def publish():
    try:
        producer = Producer(bootstrap_servers, url, topic)
        values = request.json["val"]
        response = producer.publish(key=values, values=values)
        del producer
        return response
    except Exception as e:
        return format(e)
Beispiel #4
0
def test_keep_alive():
    # 创建生产者
    p = Producer(ip=g_ip, user=g_user, password=g_password)
    p.producer_declare()
    p.create_exchange(g_exchange, "topic")

    # 创建消费者
    c = Consumer(ip=g_ip, user=g_user, password=g_password)
    c.start_consumer(g_exchange, "test1", "test1")

    time.sleep(5)  # 等5秒让队列准备就绪

    # 保持不发送任何消息
    log.info("[test_keep_alive] start sending nothing test ...")
    secs = 0
    while secs < g_test_secs:
        time.sleep(5)
        secs += 5

    try:
        # 发送一次消息检查连接可用性
        log.info("[test_keep_alive] test connection alive???")
        p.publish(g_exchange, "docx2pdf", '{"msg":"this is a test!"}')
        log.info("[test_keep_alive] connection alive!!!")
        log.info("[test_keep_alive] start sending msg test ...")
        secs = 0
        while secs < g_test_secs:
            time.sleep(1)
            p.publish(g_exchange, "test1", '{"msg":"this is a test!"}')
            secs += 1
    except Exception as err:
        log.error("[test_keep_alive] error: " + str(err))
        log.error("exit [test_keep_alive]")
    finally:
        p.publish(g_exchange, "test1", "quit")
        p.close()  # 关闭生产者连接
        c.join()  # 等待消费线程结束
Beispiel #5
0
import time
from producer import Producer

KAFKA_BROKER = 'kafka:9092'
KAFKA_TOPIC = 'thing-data'
KAFKA_GROUP = 'thing-cockpit'

if __name__ == "__main__":
    producer = Producer(KAFKA_BROKER)
    while True:
        data = {
            "timestamp": 1593834720000,
            "thing_id": 1,
            "company_id": 2,
            "data": {
                "production": 100,
            },
        }
        producer.publish(KAFKA_TOPIC, data)
        time.sleep(1)
Beispiel #6
0
#!/usr/bin/env python
from producer import Producer
from crawler import Crawler

producer = Producer('url_queue')
producer.publish('https://makeitreal.camp/')

crawler = Crawler()
crawler.run()