def produce_consume(): real_path, word_path, config_path = paths() check_paths(word_path, config_path) config = get_config(config_path) try: error = check_config(config) except Exception as e: print(type(e).__name__, e) exit(1) else: if error is not None: print(error) exit(1) q = Queue() consumer = Consumer(q) for i in range(16): t = Thread(target=consumer.consume_domains) t.daemon = True t.start() Producer(q, config, word_path).get_doms() q.join() if config['write_to_file']: print_red('writing to domains.json') p = Process(target=add_data, args=(real_path, consumer.get_domains())) p.start() print_red('sleeping zzzzz...') sleep(config['interval'])
class Specialist(HospitalWorker): """ Receives and processes specialized requests. Every specialist must be able to process exactly two out of three injury types (hip / knee / elbow). """ def __init__(self, specializations): """ Initializes connection structures, binds to each queue corresponding to specializations. """ super().__init__() self._requests_consumer = Consumer('hospital', 'topic', 'localhost') for spec in specializations: self._requests_consumer.add_queue(queue_name=spec, routing_key='hosp.' + spec, callback=self.process_request) self._requests_consumer.start(new_thread=True) def process_request(self, ch, method, properties, body): """ Simulates processing injury examination by sleeping random number of seconds (between 1 and 5) and sending back 'results' message. """ body = body.decode() request_id = properties.correlation_id target = properties.reply_to log = colored( 'processing request: ' + body + ' (request id: ' + request_id[:8] + ')', 'green') print(log, end='', flush=True) time_to_sleep = randint(1, 5) for _ in range(time_to_sleep): print(colored('.', 'green'), end='', flush=True) sleep(1) print('') message_opts = { 'properties': pika.BasicProperties(correlation_id=request_id) } message = body + ' done' self._producer.send_message(routing_key=target, message=message, **message_opts) ch.basic_ack(delivery_tag=method.delivery_tag)
def __init__(self, specializations): """ Initializes connection structures, binds to each queue corresponding to specializations. """ super().__init__() self._requests_consumer = Consumer('hospital', 'topic', 'localhost') for spec in specializations: self._requests_consumer.add_queue(queue_name=spec, routing_key='hosp.' + spec, callback=self.process_request) self._requests_consumer.start(new_thread=True)
def crawl2db(getsession): """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session 视频访问速率有很严格的限制,请调大sleepsec""" global update_round print( "now start update round %d at %s" % (update_round, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) Q = Queue() mythreads = [] aids = get_update_aids() print("try update videos with aids " + str(aids)) pthread = Producer3(Q, video_aids=aids, func=lambda x: (x, ), sleepsec=0.5) mythreads.append(pthread) consumer_num = 4 # 4个消费者线程 sessions = [getsession() for _ in range(consumer_num)] for i in range(consumer_num): db_session = sessions[i] # 每个线程一个session cthread = Consumer(Q, session=db_session, func=TddUpdateFocusVideo.store_video, sleepsec=0.5) mythreads.append(cthread) with Timer() as t: for thread in mythreads: thread.start() for thread in mythreads: thread.join() for session in sessions: session.close() # db_session.close() print("update round %d finished, runtime: %s" % (update_round, t.elapsed)) update_round += 1
def __init__(self): """ Initializes all data structures that will be used for receiving processed examination requests. """ super().__init__() self._results_consumer = Consumer('hospital', 'topic', 'localhost') self._results_queue = self._results_consumer.add_queue( callback=self.process_results) self._results_consumer.start(new_thread=True) # Set of unique ids for every sent request # that hasn't been processed yet self._pending_requests = set() self._requests_lock = threading.Lock()
def crawl2csv(filename, start, end): """sleep sec 可以用random生成在一个范围的正态分布更好些 start, end: up主mid范围""" Q = Queue() with open(filename, 'w', encoding='utf8', newline='') as fwriter: mycsvwriter = csv.writer(fwriter) mythreads = [] pthread = Producer(Q, start=start, end=end, func=BiliUser.getVideoList, sleepsec=0.1) mythreads.append(pthread) consumer_num = 4 # 4个消费者线程 for _ in range(consumer_num): cthread = Consumer(Q, csvwriter=mycsvwriter, func=BiliVideo.store_video_simpleajax, sleepsec=0.01) mythreads.append(cthread) with Timer() as t: for thread in mythreads: thread.start() for thread in mythreads: thread.join() print('runtime - (%i_%i) - : %s' % (start, end, t.elapsed)) print('======= All Done! =======')
def crawl2db(getsession, start, end): """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session 视频访问速率有很严格的限制,请调大sleepsec""" Q = Queue() mythreads = [] pthread = Producer(Q, start=start, end=end, func=BiliUser.getVideoList, sleepsec=0.1) mythreads.append(pthread) consumer_num = 4 # 4个消费者线程 sessions = [getsession() for _ in range(consumer_num)] for i in range(consumer_num): db_session = sessions[i] # 每个线程一个session cthread = Consumer(Q, session=db_session, func=BiliVideo.store_video_simpleajax, sleepsec=0.01) mythreads.append(cthread) with Timer() as t: for thread in mythreads: thread.start() for thread in mythreads: thread.join() for session in sessions: session.close() # db_session.close() print('runtime - (%i_%i) - : %s' % (start, end, t.elapsed)) print('======= All Done! =======')
class Administrator: """ Receives and logs every message sent within the whole system. Is allowed to broadcast messages to all other workers. """ def __init__(self): self._log_consumer = Consumer('hospital', 'topic', 'localhost') self._log_queue = self._log_consumer.add_queue( routing_key='#', callback=self.process_log) self._log_consumer.start(new_thread=True) self._info_producer = Producer('info', 'fanout', 'localhost') def send_info(self, message): print('sending info: ', message) self._info_producer.send_message(message=message) def process_log(self, ch, method, properties, body): body = body.decode() log = colored('LOG: ' + body, 'yellow') print(log) ch.basic_ack(delivery_tag=method.delivery_tag)
def crawl2csv(filename, start, end): """sleep sec 可以用random生成在一个范围的正态分布更好些 start,end: aid范围""" Q = Queue() with open(filename, 'w', encoding='utf8', newline='') as fwriter: mycsvwriter = csv.writer(fwriter) mythreads = [] pthread = Producer(Q, start=start, end=end, func=lambda x: (x, ), sleepsec=0.5) mythreads.append(pthread) consumer_num = 4 # 4个消费者线程 for _ in range(consumer_num): cthread = Consumer(Q, csvwriter=mycsvwriter, func=BiliVideo.store_video, sleepsec=0.5) mythreads.append(cthread) with Timer() as t: for thread in mythreads: thread.start() for thread in mythreads: thread.join() print('runtime: %s' % t.elapsed) print('======= All Done! ======')
def crawl2db(getsession, start, end): """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session 视频访问速率有很严格的限制,请调大sleepsec""" Q = Queue() mythreads = [] pthread = Producer(Q, start=start, end=end, func=lambda x: (x,), sleepsec=0.5) mythreads.append(pthread) consumer_num = 4 # 4个消费者线程 sessions = [getsession() for _ in range(consumer_num)] for i in range(consumer_num): db_session = sessions[i] # 每个线程一个session cthread = Consumer(Q, session=db_session, func=TddAddFocusVideo.store_video, sleepsec=0.5) mythreads.append(cthread) with Timer() as t: for thread in mythreads: thread.start() for thread in mythreads: thread.join() for session in sessions: session.close() # db_session.close() print('runtime: %s' % t.elapsed) print('======= All Done! ======')
class Medic(HospitalWorker): """ Creates and sends specialized examination requests and receives the results. Every request must be of one of following types: - hip examination - knee examination - elbow examination """ def __init__(self): """ Initializes all data structures that will be used for receiving processed examination requests. """ super().__init__() self._results_consumer = Consumer('hospital', 'topic', 'localhost') self._results_queue = self._results_consumer.add_queue( callback=self.process_results) self._results_consumer.start(new_thread=True) # Set of unique ids for every sent request # that hasn't been processed yet self._pending_requests = set() self._requests_lock = threading.Lock() def send_request(self, patient_name, injury_type): """ Creates new request message and sends it to proper injury queue. Adds unique request id to pending requests set. """ request_id = str(uuid4()) routing_key = 'hosp.' + injury_type message = patient_name + ' ' + injury_type message_opts = { 'properties': pika.BasicProperties(reply_to=self._results_queue, correlation_id=request_id) } with self._requests_lock: self._pending_requests.add(request_id) self._producer.send_message(routing_key, message, **message_opts) log = colored( 'sending request: ' + message + ' (request id: ' + request_id[:8] + ')', 'blue') print(log) def process_results(self, ch, method, properties, body): """ Removes request_id corresponding to received results from pending requests set. """ body = body.decode() ignore = False request_id = properties.correlation_id with self._requests_lock: if request_id in self._pending_requests: self._pending_requests.remove(request_id) else: ignore = True if not ignore: log = colored( 'received results: ' + body + ' (request id: ' + request_id[:8] + ')', 'green') print(log) ch.basic_ack(delivery_tag=method.delivery_tag)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2019/11/7 下午3:48 # @Author : yinxin<*****@*****.**> # @Site : # @File : application # @Software: PyCharm from utils import Consumer, send_data, crawl_url from common import delay import time # init consumer consumer = Consumer.get_consumer() def main(): msg = consumer.get_msg() url = bytes.decode(msg.value) html = crawl_url(url) send_data(url, html) if __name__ == '__main__': while True: main() time.sleep(delay)
def __init__(self): self._log_consumer = Consumer('hospital', 'topic', 'localhost') self._log_queue = self._log_consumer.add_queue( routing_key='#', callback=self.process_log) self._log_consumer.start(new_thread=True) self._info_producer = Producer('info', 'fanout', 'localhost')