def __init__(self): self.trigger = False self.result_queue = RedisQueue(Config.UP_QUEUE_NAME) self.command_queue = RedisQueue(Config.DOWN_QUEUE_NAME) self.port = serial.Serial("/dev/ttyS0", 9600, parity=serial.PARITY_NONE, stopbits=serial.STOPBITS_ONE, bytesize=serial.EIGHTBITS, timeout=Config.SERIAL_WAIT) self.start()
def __init__(self, status_queue, stop_event, config): super(Sender, self).__init__() self.normal_data_queue = RedisQueue('normal') self.retry_data_queue = RedisQueue('retry') self.status_queue = status_queue self.stop_event = stop_event self.base_url = config["api_url"] self.key = config["key"] self.store_energy_url = self.base_url + "/v2/energy" self.backup_file = "backup" self.console_mode = True if config["console_mode"] == "true" else False self.connected = False
def __init__(self, time_execution_in_sec, chart_title, slave, *args, **kwargs): super(MyTaskSet, self).__init__(time_execution_in_sec, chart_title, slave, *args, **kwargs) self.running = True self.slave = slave self.code = None self.queue_chart = RedisQueue(name="data_chart", namespace="data_chart") self.queue_tasks = RedisQueue(name="data_tasks", namespace="data_tasks") self.chart = ReportCharts(time_execution_in_sec, chart_title, self.slave) self.db = create_engine(self.config["database"]["db_string"])
def main(): rq = RedisQueue('reddit-book-stream', host=REDIS_HOST, port=REDIS_PORT) reddit = praw.Reddit(user_agent=USER_AGENT, client_id=CLIENT_ID, client_secret=CLIENT_SECRET, username=USERNAME, password=PASSWORD)
def __init__(self): self.base_url = "https://weixin.sogou.com/weixin" self.keyword = KEY self.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;' 'q=0.8,application/signed-exchange;v=b3', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': COOKIES, 'Host': 'weixin.sogou.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/73.0.3683.86 Safari/537.36', } self.session = Session() self.queue = RedisQueue() self.mysql = Mysql()
def __init__(self): self.command_queue = RedisQueue(Config.DOWN_QUEUE_NAME) self.result_queue = RedisQueue(Config.UP_QUEUE_NAME) self.socket = websocket.WebSocketApp(HOST, on_open=self.on_open, on_message=self.on_message, on_error=self.on_error, on_close=self.on_close) while True: try: self.socket.run_forever(ping_interval=100) except: pass time.sleep(5)
def main(): with RecordsDB() as records_db: records_parser = RecordsParser(records_db) with CountriesDB() as countries_db: queue = RedisQueue(name='jobs', namespace='queue', decode_responses=True) job_in_json = queue.wait_and_dequeue() while job_in_json is not None: job = json.loads(job_in_json) country_id = job['country_id'] country_name = countries_db.get_country_from_id(country_id) num_records = job['num_records'] if country_name is None: raise Exception("Country name cannot be None!") records_parser.get_records(country=country_name, country_id=country_id, max_records=num_records) job_in_json = queue.wait_and_dequeue()
def populate_job_queue(): queue = RedisQueue('jobs') with CountriesDB() as countries_db: countries = countries_db.get_countries() for country in countries: job = {'country_id': country[0], 'num_records': 5000} job_in_json = json.dumps(job) queue.enqueue(job_in_json)
def __init__(self, status_queue, config, stop_event): super().__init__() self.energy_data_queue = RedisQueue('normal') self.status_queue = status_queue self.reader = self.init_reader() self.solar_ip = config['solar_ip'] self.solar_url = self.solar_ip + config['solar_url'] self.stop_event = stop_event self.console_mode = True if config["console_mode"] == "true" else False
def __init__(self, name, collector): if SCHEDULER_PERSIST: # 如果使用分布式或者是持久化,使用redis的队列 self.queue = RedisQueue(name=name) self._filter_container = RedisFilterContainer( ) # 使用redis作为python的去重的容器 else: self.queue = Queue() self._filter_container = NoramlFilterContainer( ) # 使用Python的set()集合 # 统计重复的数量 self.collector = collector
def __init__(self, stop_event): super().__init__() self.energy_data_queue = RedisQueue('normal') self.stop_event = stop_event self.default_message = self.get_default_message() self.total_usage = random.randint(1000, 5000) self.total_redelivery = random.randint(1000, 5000) self.total_solar = random.randint(1000, 5000) self.total_gas = random.randint(1000, 5000)
def __init__(self, redis_mgr, service_name, custom_key, func_name, callback_to_main_thread=False): self.service_name = service_name self.func_name = func_name self.redis_queue = RedisQueue(redis_mgr) self.custom_key = custom_key self.redis_queue.subscribe(self.service_name, custom_key) self.callback_to_main_thread = callback_to_main_thread
def __init__(self, thread_index, global_network): self.thread_index = thread_index self.local_network = global_network self.game_state = GameState() self.local_t = 0 # for log self.episode_reward = 0.0 self.episode_start_time = 0.0 self.prev_local_t = 0 self.rq = RedisQueue(REDIS_QUEUE_NAME) return
def main(): comment_queue = RedisQueue('reddit-book-stream', host=REDIS_HOST, port=REDIS_PORT) reddit = praw.Reddit(user_agent=USER_AGENT, client_id=CLIENT_ID, client_secret=CLIENT_SECRET, username=USERNAME, password=PASSWORD) # stream comments from r/all, pasue_after < 0 allows faster streaming for comment in reddit.subreddit('all').stream.comments(pause_after=-1): if comment and mentions_book(comment.body): comment_queue.put(comment.id) print(comment.id) print(f'reddit.com/api/info?id=t1_{comment.id}')
def __init__(self): self.device = '/gpu:0' if USE_GPU else '/cpu:0' self.stop_requested = False self.global_t = 0 if USE_LSTM: self.global_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, self.device, -1) else: self.global_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, self.device) self.global_network.create_loss(ENTROPY_BETA) self.initial_learning_rate = log_uniform(INITIAL_ALPHA_LOW, INITIAL_ALPHA_HIGH, INITIAL_ALPHA_LOG_RATE) print 'initial_learning_rate:', self.initial_learning_rate self.learning_rate_input = tf.placeholder('float') self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON) grads_and_vars = self.optimizer.compute_gradients( self.global_network.total_loss, self.global_network.get_vars()) self.apply_gradients = self.optimizer.apply_gradients(grads_and_vars) self.actor_threads = [] for i in range(PARALLEL_SIZE): actor_thread = A3CActorThread(i, self.global_network) self.actor_threads.append(actor_thread) self.sess = tf.InteractiveSession() self.sess.run(tf.initialize_all_variables()) self.reward_input = tf.placeholder(tf.float32) tf.scalar_summary('reward', self.reward_input) self.time_input = tf.placeholder(tf.float32) tf.scalar_summary('living_time', self.time_input) self.summary_op = tf.merge_all_summaries() self.summary_writer = tf.train.SummaryWriter(LOG_FILE, self.sess.graph) self.saver = tf.train.Saver() self.restore() self.lock = threading.Lock() self.rq = RedisQueue(REDIS_QUEUE_NAME) self.train_count = 0 return
async def main(): msg = "stockx 爬虫 Starting!" print(msg) logging.info(msg) q = RedisQueue('rq') # 建立 client request async with aiohttp.ClientSession() as client: for k, v in URL.items(): for page in range(1, 25): api_url = DOMAIN + v + str(page) task = asyncio.create_task(spiderList(client, api_url, q)) await asyncio.sleep(10) done, pending = await asyncio.wait({task}) if task in done: print('[爬取完成]所有爬取进程已经全部完成') logging.info("[爬取完成]所有爬取进程已经全部完成")
async def main(loop): print("开始爬虫") # 等待mysql连接好 pool = await aiomysql.create_pool(host=conf.database['host'], port=conf.database['port'], user=conf.database['user'], password=conf.database['passwd'], db=conf.database['db'], loop=loop) q = RedisQueue('rq') for k, v in URL.items(): for page in range(30): api_url = DOMAIN + v + str(page) task = asyncio.create_task(spiderList(pool, api_url, q)) await asyncio.sleep(1) done, pending = await asyncio.wait({task}) if task in done: print('[爬取完成]所有爬取进程已经全部完成') logging.info("[爬取完成]所有爬取进程已经全部完成")
def test_failure(self): qkey = 'test:failure:queue' tid = '12' rq = RedisQueue(self.rc, 1, 2) self.rc.lpush(qkey, tid) tid0 = rq.safe_pop(qkey) self.assertEqual(tid, tid0) ## Popping another task too fast, before the task timeout has been ## reached. tid1 = rq.safe_pop(qkey) self.assertIsNone(tid1) ## Supposing the worker had died before finishing the task, we can take ## it again after the task timeout. time.sleep(2) tid2 = rq.safe_pop(qkey) self.assertEqual(tid, tid2) ## Marking the task as done should make impossible to retrieve the same ## task. rq.mark_done(qkey, tid2) time.sleep(2) tid3 = rq.safe_pop(qkey) self.assertIsNone(tid3) self.rc.delete(qkey, "%s:done" % qkey)
def push_to_queue(queue_name, items): queue = RedisQueue(queue_name) for item in items: queue.put(item)
# -*- coding:utf-8 -*- __author__ = '张全亮' import requests import urllib3 import math import time import datetime from multiprocessing.dummy import Pool import hashlib urllib3.disable_warnings() from logger import Logger from redis_queue import RedisQueue yz = RedisQueue('yz') yz_rec = RedisQueue('yz_rec') logger = Logger() """校验订单规则,每页查找订单,找到符合条件的结束翻页查找""" def check_pay(order_sn, pdduid, kdtsessionid): cookie = 'KDTSESSIONID={}'.format(kdtsessionid) firsr_url = 'https://h5.youzan.com/v2/trade/order/list.json?perpage=20&page=1&type=all' headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36", 'Cookie': cookie } res = requests.get(firsr_url, headers=headers, verify=False) if '页面已被删除' in res.text:
#!/usr/bin/env python # coding:utf-8 # Copyright (C) dirlt from redis_queue import RedisQueue command_queue = RedisQueue('command') command_queue.put('trigger')
import datetime from mysql_db import db_insert from flask import Flask, jsonify, request, redirect, render_template from redis_queue import RedisQueue app = Flask(__name__) # 下单部分 from pdd_spider import pdd_main from yz_spider import yz_main # 查询部分 from pdd_query import pdd_pass_query from yz_query import yz_pass_query pdd = RedisQueue('pdd') yz = RedisQueue('yz') """拼多多下单爬虫""" def pdd_spider(pdduid, accesstoken, goods_url, amount, order_number): result = pdd_main(pdduid, accesstoken, goods_url, amount, order_number) return result """有赞下单爬虫""" def yz_spider(pdduid, kdtsessionid, goods_url, amount, order_number): result = yz_main(pdduid, kdtsessionid, goods_url, amount, order_number)
def add_job_to_queue(country_id, num_records): queue = RedisQueue('jobs') job = {'country_id': country_id, 'num_records': num_records} job_in_json = json.dumps(job) queue.enqueue(job_in_json)
# -*- coding:utf-8 -*- from redis_queue import RedisQueue import time q = RedisQueue('rq') while 1: result = q.get_wait() if not result: break print("output.py: data {} out of queue {}".format(result, time.strftime("%c"))) time.sleep(2)
def __init__(self, redis_mgr): self.redis_mgr = redis_mgr self.queue = RedisQueue(redis_mgr)
# -*- coding:utf-8 -*- from redis_queue import RedisQueue import time q = RedisQueue('rq') # 新建队列名为rq for i in ["a", "b", "c", "d", "e", "f"]: q.put(i) print("input.py: data {} enqueue {}".format(i, time.strftime("%c"))) time.sleep(1)
def __init__(self, *args, **kwargs): self.singleton = Singleton() self.queue_chart = RedisQueue(name="data_chart", namespace="data_chart") super(WebServerClass, self).__init__(*args, **kwargs)
# -*- coding:utf-8 -*- __author__ = '张全亮' import requests import urllib3 from bs4 import BeautifulSoup from multiprocessing.dummy import Pool urllib3.disable_warnings() import re, datetime, time from logger import Logger from mysql_db import db_insert from redis_queue import RedisQueue pdd = RedisQueue('pdd') pdd_rec = RedisQueue('pdd_rec') logger = Logger() """自动5星好评""" def evaluation(pdduid, accesstoken, goods_id, order_sn): url = 'https://mobile.yangkeduo.com/proxy/api/v2/order/goods/review?pdduid={}'.format(pdduid) cookie = 'pdd_user_id={}; PDDAccessToken={};'.format(pdduid, accesstoken) headers = { 'accesstoken': accesstoken, 'Accept': 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0', 'Cookie': cookie } data = { "goods_id": goods_id,
class Spider(): base_url = 'https://weixin.sogou.com/weixin' keyword = 'NBA' headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4,zh-TW;q=0.2,mt;q=0.2', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': 'IPLOC=CN1100; SUID=6FEDCF3C541C940A000000005968CF55; SUV=1500041046435211; ABTEST=0|1500041048|v1; SNUID=CEA85AE02A2F7E6EAFF9C1FE2ABEBE6F; weixinIndexVisited=1; JSESSIONID=aaar_m7LEIW-jg_gikPZv; ld=Wkllllllll2BzGMVlllllVOo8cUlllll5G@HbZllll9lllllRklll5@@@@@@@@@@; LSTMV=212%2C350; LCLKINT=4650; ppinf=5|1500042908|1501252508|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZTo1NDolRTUlQjQlOTQlRTUlQkElODYlRTYlODklOEQlRTQlQjglQTglRTklOUQlOTklRTglQTclODV8Y3J0OjEwOjE1MDAwNDI5MDh8cmVmbmljazo1NDolRTUlQjQlOTQlRTUlQkElODYlRTYlODklOEQlRTQlQjglQTglRTklOUQlOTklRTglQTclODV8dXNlcmlkOjQ0Om85dDJsdUJfZWVYOGRqSjRKN0xhNlBta0RJODRAd2VpeGluLnNvaHUuY29tfA; pprdig=ppyIobo4mP_ZElYXXmRTeo2q9iFgeoQ87PshihQfB2nvgsCz4FdOf-kirUuntLHKTQbgRuXdwQWT6qW-CY_ax5VDgDEdeZR7I2eIDprve43ou5ZvR0tDBlqrPNJvC0yGhQ2dZI3RqOQ3y1VialHsFnmTiHTv7TWxjliTSZJI_Bc; sgid=27-27790591-AVlo1pzPiad6EVQdGDbmwnvM; PHPSESSID=mkp3erf0uqe9ugjg8os7v1e957; SUIR=CEA85AE02A2F7E6EAFF9C1FE2ABEBE6F; sct=11; ppmdig=1500046378000000b7527c423df68abb627d67a0666fdcee; successCount=1|Fri, 14 Jul 2017 15:38:07 GMT', 'Host': 'weixin.sogou.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36' } session = Session() queue = RedisQueue() mysql = MySQL() def get_proxy(self): try: r = requests.get(PROXY_POOL_URL) if r.status_code == 200: print('Get Proxy', r.text) return r.text return None except requests.ConnectionError: return None def start(self): # 初始化工作 #全局更新headers self.session.headers.update(self.headers) start_url = self.base_url + '?' + urlencode({'query': self.keyword, 'type': 2}) weixin_request = WeixinRequest(url=start_url, callback=self.parse_index, need_proxy=True) # 调度一个请求 self.queue.add(weixin_request) def parse_index(self, response): doc = pq(response.text) items = doc('.news-box .news-list li .txt-box h3 a').items() for item in items: url = item.attr['href'] weixin_request = WeixinRequest(url=url, callback=self.parse_detail) yield weixin_request next = doc('#sogou_next').attr('href') if next: url = self.base_url + str(next) weixin_request = WeixinRequest(url=url, callback=self.parse_index, need_proxy=True) yield weixin_request def parse_detail(self, response): doc = pq(response.text) data = { 'title': doc('.rich_media_title').text(), 'content': doc('.rich_media_content').text(), 'date': doc('#post-date').text(), 'nickname': doc('#js_profile_qrcode > div > strong').text(), 'wechat': doc('#js_profile_qrcode > div > p:nth-child(3) > span').text() } yield data def request(self, weixin_request): try: if weixin_request.need_proxy: proxy = self.get_proxy() if proxy: proxies = { 'http': 'http://' + proxy, 'https': 'https://' + proxy } return self.session.send(weixin_request.prepare(), timeout=weixin_request.timeout, allow_redirects=False, proxies=proxies) return self.session.send(weixin_request.prepare(), timeout=weixin_request.timeout, allow_redirects=False) except (ConnectionError, ReadTimeout) as e: print(e.args) return False def error(self, weixin_request): weixin_request.fail_time += 1 print('Request Failed', weixin_request.fail_time, 'Times', weixin_request.url) if weixin_request.fail_time < MAX_FAILED_TIME: self.queue.add(weixin_request) def schedule(self): while not self.queue.empty(): weixin_request = self.queue.pop() callback = weixin_request.callback print('Schedule', weixin_request.url) r = self.request(weixin_request) if r and r.status_code in VALID_STATUSES: results = list(callback(r)) if results: for result in results: print('New Result', result) if isinstance(result, WeixinRequest): self.queue.add(result) if isinstance(result, dict): self.mysql.insert('articles', result) else: self.error(weixin_request) else: self.error(weixin_request) def run(self): self.start() self.schedule()
def __init__(self, topic, host='localhost', port=6379, db=0): self.rqueue = RedisQueue(topic, 1, host=host, port=port, db=db)