Ejemplo n.º 1
0
    def run(self, params):
        hkey = params.get('hkey', '').strip()
        max_num = params.get('max_num', 100)
        min_num = params.get('min_num', 1)
        ratio = params.get('ratio', 0.1)
        timer_seq = params.get('ts', 60)
        timer_num = params.get('tn', 1)

        total = connections.redis.hlen(hkey)
        num = int(round(total * ratio))
        num = num if min_num <= num <= max_num else (
            max_num if num > max_num else min_num)

        drop_list = [
            k for k, v in connections.redis.hscan_iter(hkey, count=100)
            if int(v) <= -10
        ]
        retry_list = random.sample(drop_list, num)
        timestamp = int(time.time())
        task_map = {}
        for proxy_str in retry_list:
            host = proxy_str.split(':', 1)[0]
            port = int(proxy_str.split(':', 1)[1])
            for t_idx in range(timer_num):
                next_tick = timestamp + pyutils.crc32_mod(
                    proxy_str, timer_seq) + t_idx * timer_seq
                rawparam = '%s#%d#%d#%d' % (host, port, timer_seq,
                                            int(next_tick / timer_seq))
                task_map.setdefault(rawparam, next_tick)

        queue_raw_jobs('check_proxy_timed_set', task_map)
        return {'num': len(retry_list), 'total': total}
Ejemplo n.º 2
0
    def send_raw_tasks(self, queue, params_list, start=True, block=True):
        if not self.started and start:
            self.start()

        queue_raw_jobs(queue, params_list)

        if block:
            self.wait_for_idle()
Ejemplo n.º 3
0
    def send_raw_tasks(self, queue, params_list, start=True, block=True):
        if not self.started and start:
            self.start()

        queue_raw_jobs(queue, params_list)

        if block:
            # Wait for the queue to be empty. Might be error-prone when tasks
            # are in-memory between the 2
            q = Queue(queue)
            while q.size() > 0 or self.mongodb_jobs.mrq_jobs.find({"status": "started"}).count() > 0:
                # print "S", q.size(),
                # self.mongodb_jobs.mrq_jobs.find({"status":
                # "started"}).count()
                time.sleep(0.1)
Ejemplo n.º 4
0
    def send_raw_tasks(self, queue, params_list, start=True, block=True):
        if not self.started and start:
            self.start()

        queue_raw_jobs(queue, params_list)

        if block:
            # Wait for the queue to be empty. Might be error-prone when tasks
            # are in-memory between the 2
            q = Queue(queue)
            while q.size() > 0 or self.mongodb_jobs.mrq_jobs.find({"status": "started"}).count() > 0:
                # print "S", q.size(),
                # self.mongodb_jobs.mrq_jobs.find({"status":
                # "started"}).count()
                time.sleep(0.1)
Ejemplo n.º 5
0
    def run(self, params):
        filename = params.get('f', '').strip()
        if _check_t(params):
            abort_current_job()

        timer_num = 3
        timer_seq = CONF_CHECK_INTERVAL

        gql = pyfile.load_str(filename).strip()
        if not gql:
            abort_current_job()

        proxy_list, gret = run_gdom_page(
            gql,
            get_proxy=lambda: connections.redis.srandmember(CONF_DATA_OK_KEY))
        proxy_list and log.info('FETCH OK filename:%s, num:%d' %
                                (filename, len(proxy_list)))
        if gret.errors:
            log.error('FETCH ERROR filename:%s, errors:%s' %
                      (filename, gret.errors))

        if not proxy_list:
            log.error('FETCH EMPTY filename:%s, gret:%r' % (filename, gret))
            abort_current_job()

        timestamp = int(time.time())
        task_map = {}
        for proxy_str in proxy_list:
            host = proxy_str.split(':', 1)[0]
            port = int(proxy_str.split(':', 1)[1])
            for t_idx in range(timer_num):
                next_tick = timestamp + pyutils.crc32_mod(
                    proxy_str, timer_seq) + t_idx * timer_seq
                rawparam = '%s#%d#%d#%d' % (host, port, timer_seq,
                                            int(next_tick / timer_seq))
                task_map.setdefault(rawparam, next_tick)

        queue_raw_jobs('check_proxy_timed_set', task_map)
        return {
            'file': filename,
            'num': len(proxy_list),
            'proxy_list': proxy_list
        }
Ejemplo n.º 6
0
    def run(self, params):
        dirname = params.get('p', '').strip()
        timer_seq = params.get('ts', 60)
        timer_num = params.get('tn', 1)

        file_map = pyfile.get_dict_of_dir(
            dirname, filter_func=lambda s: s.endswith('.gql'))
        f_seq = math.ceil(len(file_map) / 100)
        timestamp = int(time.time())
        task_map = {}

        timer_seq = timer_seq * f_seq
        for filename, _ in file_map.items():
            for t_idx in range(timer_num):
                next_tick = timestamp + pyutils.crc32_mod(
                    filename, timer_seq) + t_idx * timer_seq
                rawparam = '%s#%d#%d' % (filename, timer_seq,
                                         int(next_tick / timer_seq))
                task_map.setdefault(rawparam, next_tick)

        queue_raw_jobs('fetch_proxy_timed_set', task_map)
        return {'num': len(file_map)}
Ejemplo n.º 7
0
#!/usr/bin/env python
import sys
import time
from mrq.context import setup_context
from mrq.job import queue_raw_jobs

setup_context()

queue = sys.argv[1]
n = int(sys.argv[2])
t = int(sys.argv[3])

if queue in ("example_timed_set"):
    now = time.time()
    # every 10 seconds
    queue_raw_jobs(queue, {"task_%s" % _: now + (_ + 1) * t for _ in range(n)})
Ejemplo n.º 8
0
#!/usr/bin/env python
import sys
from mrq.context import setup_context
from mrq.job import queue_jobs, queue_raw_jobs

setup_context()

queue = sys.argv[1]
n = int(sys.argv[2])

if queue == "square":
    queue_jobs("tasks.Square", [{"n": 42} for _ in range(n)], queue=queue)

elif queue in ("square_raw", "square_nostorage_raw"):
    queue_raw_jobs(queue, [42 for _ in range(n)])