def run(self, params): hkey = params.get('hkey', '').strip() max_num = params.get('max_num', 100) min_num = params.get('min_num', 1) ratio = params.get('ratio', 0.1) timer_seq = params.get('ts', 60) timer_num = params.get('tn', 1) total = connections.redis.hlen(hkey) num = int(round(total * ratio)) num = num if min_num <= num <= max_num else ( max_num if num > max_num else min_num) drop_list = [ k for k, v in connections.redis.hscan_iter(hkey, count=100) if int(v) <= -10 ] retry_list = random.sample(drop_list, num) timestamp = int(time.time()) task_map = {} for proxy_str in retry_list: host = proxy_str.split(':', 1)[0] port = int(proxy_str.split(':', 1)[1]) for t_idx in range(timer_num): next_tick = timestamp + pyutils.crc32_mod( proxy_str, timer_seq) + t_idx * timer_seq rawparam = '%s#%d#%d#%d' % (host, port, timer_seq, int(next_tick / timer_seq)) task_map.setdefault(rawparam, next_tick) queue_raw_jobs('check_proxy_timed_set', task_map) return {'num': len(retry_list), 'total': total}
def send_raw_tasks(self, queue, params_list, start=True, block=True): if not self.started and start: self.start() queue_raw_jobs(queue, params_list) if block: self.wait_for_idle()
def send_raw_tasks(self, queue, params_list, start=True, block=True): if not self.started and start: self.start() queue_raw_jobs(queue, params_list) if block: # Wait for the queue to be empty. Might be error-prone when tasks # are in-memory between the 2 q = Queue(queue) while q.size() > 0 or self.mongodb_jobs.mrq_jobs.find({"status": "started"}).count() > 0: # print "S", q.size(), # self.mongodb_jobs.mrq_jobs.find({"status": # "started"}).count() time.sleep(0.1)
def run(self, params): filename = params.get('f', '').strip() if _check_t(params): abort_current_job() timer_num = 3 timer_seq = CONF_CHECK_INTERVAL gql = pyfile.load_str(filename).strip() if not gql: abort_current_job() proxy_list, gret = run_gdom_page( gql, get_proxy=lambda: connections.redis.srandmember(CONF_DATA_OK_KEY)) proxy_list and log.info('FETCH OK filename:%s, num:%d' % (filename, len(proxy_list))) if gret.errors: log.error('FETCH ERROR filename:%s, errors:%s' % (filename, gret.errors)) if not proxy_list: log.error('FETCH EMPTY filename:%s, gret:%r' % (filename, gret)) abort_current_job() timestamp = int(time.time()) task_map = {} for proxy_str in proxy_list: host = proxy_str.split(':', 1)[0] port = int(proxy_str.split(':', 1)[1]) for t_idx in range(timer_num): next_tick = timestamp + pyutils.crc32_mod( proxy_str, timer_seq) + t_idx * timer_seq rawparam = '%s#%d#%d#%d' % (host, port, timer_seq, int(next_tick / timer_seq)) task_map.setdefault(rawparam, next_tick) queue_raw_jobs('check_proxy_timed_set', task_map) return { 'file': filename, 'num': len(proxy_list), 'proxy_list': proxy_list }
def run(self, params): dirname = params.get('p', '').strip() timer_seq = params.get('ts', 60) timer_num = params.get('tn', 1) file_map = pyfile.get_dict_of_dir( dirname, filter_func=lambda s: s.endswith('.gql')) f_seq = math.ceil(len(file_map) / 100) timestamp = int(time.time()) task_map = {} timer_seq = timer_seq * f_seq for filename, _ in file_map.items(): for t_idx in range(timer_num): next_tick = timestamp + pyutils.crc32_mod( filename, timer_seq) + t_idx * timer_seq rawparam = '%s#%d#%d' % (filename, timer_seq, int(next_tick / timer_seq)) task_map.setdefault(rawparam, next_tick) queue_raw_jobs('fetch_proxy_timed_set', task_map) return {'num': len(file_map)}
#!/usr/bin/env python import sys import time from mrq.context import setup_context from mrq.job import queue_raw_jobs setup_context() queue = sys.argv[1] n = int(sys.argv[2]) t = int(sys.argv[3]) if queue in ("example_timed_set"): now = time.time() # every 10 seconds queue_raw_jobs(queue, {"task_%s" % _: now + (_ + 1) * t for _ in range(n)})
#!/usr/bin/env python import sys from mrq.context import setup_context from mrq.job import queue_jobs, queue_raw_jobs setup_context() queue = sys.argv[1] n = int(sys.argv[2]) if queue == "square": queue_jobs("tasks.Square", [{"n": 42} for _ in range(n)], queue=queue) elif queue in ("square_raw", "square_nostorage_raw"): queue_raw_jobs(queue, [42 for _ in range(n)])