import time from function_scheduling_distributed_framework import task_deco, BrokerEnum, run_consumer_with_multi_process from nb_log import stdout_write, print_raw @task_deco('test_rabbit_queue', broker_kind=BrokerEnum.RABBITMQ_AMQPSTORM, qps=1000, is_using_distributed_frequency_control=True, log_level=10) def test_fun(x): # print(x) print(x) # time.sleep(20) if __name__ == '__main__': run_consumer_with_multi_process(test_fun, 1)
import time """ 这种是多进程方式,一次编写能够兼容win和linux的运行。一次性启动6个进程 叠加 多线程 并发。 """ from function_scheduling_distributed_framework import task_deco, BrokerEnum, ConcurrentModeEnum, run_consumer_with_multi_process @task_deco('test_multi_process_queue', broker_kind=BrokerEnum.REDIS, concurrent_mode=ConcurrentModeEnum.THREADING, log_level=20) def fff(x): pass # print(x * 10,os.getpid()) if __name__ == '__main__': # fff.consume() run_consumer_with_multi_process(fff, 16) # 一次性启动6个进程 叠加 多线程 并发。
import time """ 这种是多进程方式,一次编写能够兼容win和linux的运行。一次性启动16个进程 叠加 多线程 并发。 """ from function_scheduling_distributed_framework import task_deco, BrokerEnum, ConcurrentModeEnum, run_consumer_with_multi_process import os import requests import threading @task_deco('test_multi_process_queue', broker_kind=BrokerEnum.REDIS_ACK_ABLE, concurrent_mode=ConcurrentModeEnum.THREADING, qps=10) def fff(x): # resp = requests.get('http://www.baidu.com/content-search.xml') resp = requests.get('http://127.0.0.1') print(x, os.getpid(), threading.get_ident(), resp.text[:5]) if __name__ == '__main__': # for i in range(10000): # fff.push(i) # fff.consume() # 这个是单进程多线程/协程 消费。 # 一次性启动16个进程 叠加 多线程 并发。此demo可以作为超高速爬虫例子,能充分利用io和cpu,在16核机器上请求效率远远暴击 scrapy 数十倍,大家可以亲自对比测试。 run_consumer_with_multi_process(fff, 4)
@task_deco( '20000', broker_kind=BrokerEnum.REDIS, concurrent_num=2, log_level=20, qps=0, concurrent_mode=ConcurrentModeEnum.SINGLE_THREAD, ) def f_test_speed(x): pass # logger.debug(x) # f_test_speed2.push(x * 10) print(x) # time.sleep(20) # @task_deco('speed_test_queue2', broker_kind=BrokerEnum.REDIS, log_level=20, qps=2) # def f_test_speed2(y): # pass # print(y) if __name__ == '__main__': # f_test_speed.clear() # for i in range(1000000): # f_test_speed.push(i) # f_test_speed.consume() run_consumer_with_multi_process(f_test_speed, 1) # # f_test_speed2.consume()
import time import random from function_scheduling_distributed_framework import task_deco, BrokerEnum, run_consumer_with_multi_process, ConcurrentModeEnum @task_deco('test_queue66', broker_kind=BrokerEnum.HTTPSQS, qps=2, log_level=10, is_print_detail_exception=False, is_show_message_get_from_broker=False) def f(x, y): print(f''' {x} + {y} = {x + y}''') return x + y if __name__ == '__main__': # f.consume() run_consumer_with_multi_process(f, 1)
@task_deco('test_queue', broker_kind=BrokerEnum.REDIS) def ff(x, y): import os time.sleep(10) print(os.getpid(), x, y) if __name__ == '__main__': # ff.publish() ff.clear() # 清除 for i in range(1000): ff.push(i, y=i * 2) # 这个与push相比是复杂的发布,第一个参数是函数本身的入参字典,后面的参数为任务控制参数,例如可以设置task_id,设置延时任务,设置是否使用rpc模式等。 ff.publish({ 'x': i * 10, 'y': i * 2 }, priority_control_config=PriorityConsumingControlConfig( countdown=1, misfire_grace_time=1)) ff(666, 888) # 直接运行函数 ff.start() # 和 conusme()等效 ff.consume() # 和 start()等效 run_consumer_with_multi_process(ff, 2) # 启动两个进程 ff.multi_process_start( 2 ) # 启动两个进程,和上面的run_consumer_with_multi_process等效,现在新增这个multi_process_start方法。 # IdeAutoCompleteHelper(ff).multi_process_start(3) # IdeAutoCompleteHelper 可以补全提示,但现在装饰器加了类型注释,ff. 已近可以在pycharm下补全了。
crawl_detail_page.push(url_detail, title=title, news_type=news_type) # 发布详情页任务 if do_page_turning: last_page = int( sel.css('#channelPage > a:nth-child(12)::text').extract_first()) for p in range(2, last_page + 1): crawl_list_page.push(news_type, p) # 列表页翻页。 @task_deco('car_home_detail', broker_kind=BrokerEnum.REDIS_ACK_ABLE, concurrent_num=600, qps=5, do_task_filtering=True) def crawl_detail_page(url, title, news_type): resp_text = requests.get(url).text sel = Selector(resp_text) author = sel.css('#articlewrap > div.article-info > div > a::text').extract_first() or \ sel.css('#articlewrap > div.article-info > div::text').extract_first() or '' author = author.replace("\n", "").strip() print(f'保存数据 {news_type} {title} {author} {url} 到 数据库') # 用户自由发挥保存。 if __name__ == '__main__': # crawl_list_page('news',1) crawl_list_page.consume() # 启动列表页消费 # 这样速度更猛,叠加多进程 run_consumer_with_multi_process(crawl_detail_page, 1)
# import gevent.monkey;gevent.monkey.patch_all() import time from function_scheduling_distributed_framework import task_deco, BrokerEnum, run_consumer_with_multi_process, ConcurrentModeEnum import urllib3 import requests http = urllib3.PoolManager() @task_deco('speed_baidu', broker_kind=BrokerEnum.REDIS, log_level=20, concurrent_num=60, concurrent_mode=ConcurrentModeEnum.THREADING, is_using_distributed_frequency_control=True, is_print_detail_exception=False) def baidu_speed(x, ): # print(x) try: resp = requests.request('get', 'http://www.baidu.com/content-search.xml') except: pass if __name__ == '__main__': run_consumer_with_multi_process(baidu_speed, 10) # # f_test_speed2.consume()