Esempio n. 1
0
 def runs(cls, crawler):
     for run_id in cls.run_ids(crawler):
         start = cls.conn.get(make_key("run", run_id, "start"))
         end = cls.conn.get(make_key("run", run_id, "end"))
         total_ops = cls.conn.get(make_key("run", run_id, "total_ops"))
         yield {
             'run_id': run_id,
             'total_ops': unpack_int(total_ops),
             'start': unpack_datetime(start, datetime.utcnow()),
             'end': unpack_datetime(end)
         }
Esempio n. 2
0
 def runs(cls, crawler):
     for run_id in cls.conn.lrange(make_key(crawler, "runs_list"), 0, -1):
         start = cls.conn.get(make_key("run", run_id, "start"))
         end = cls.conn.get(make_key("run", run_id, "end"))
         total_ops = cls.conn.get(make_key("run", run_id, "total_ops"))
         yield {
             'run_id': run_id,
             'total_ops': unpack_int(total_ops),
             'start': unpack_datetime(start),
             'end': unpack_datetime(end)
         }
Esempio n. 3
0
    def tasks(cls):
        queues = [make_key('queue', c, s) for c, s in manager.stages]
        random.shuffle(queues)
        while True:
            task_data_tuple = cls.conn.blpop(queues)
            # blpop blocks until it finds something. But fakeredis has no
            # blocking support. So it justs returns None.
            if not task_data_tuple:
                return

            key, json_data = task_data_tuple
            # Shift the queues list so that the matching key is at the
            # very end of the list, priorising all other crawlers.
            # queues = list(reversed(queues))
            deq = deque(queues)
            deq.rotate((queues.index(key) * -1) - 1)
            queues = list(deq)

            task_data = load_json(json_data)
            stage = task_data["stage"]
            state = task_data["state"]
            data = task_data["data"]
            next_time = task_data.get("next_allowed_exec_time")
            next_time = unpack_datetime(next_time)
            crawler = state.get('crawler')
            cls.conn.decr(make_key('queue_pending', crawler))
            yield (stage, state, data, next_time)
Esempio n. 4
0
    def tasks(cls):
        queues = [make_key('queue', c, s) for c, s in manager.stages]
        while True:
            timeout = 1 if settings.DEBUG else 0
            task_data_tuple = conn.blpop(queues, timeout=timeout)
            # blpop blocks until it finds something. But fakeredis has no
            # blocking support. So it justs returns None.
            if task_data_tuple is None:
                return

            key, json_data = task_data_tuple
            # Shift the queues list so that the matching key is at the
            # very end of the list, priorising all other crawlers.
            # queues = list(reversed(queues))
            deq = deque(queues)
            deq.rotate((queues.index(key) * -1) - 1)
            queues = list(deq)

            task_data = load_json(json_data)
            stage = task_data["stage"]
            state = task_data["state"]
            data = task_data["data"]
            next_time = task_data.get("next_allowed_exec_time")
            next_time = unpack_datetime(next_time)
            yield (stage, state, data, next_time)
Esempio n. 5
0
 def event_list(cls, key, start, end):
     results = []
     events = cls.conn.lrange(key, start, end)
     if events is None:
         return results
     for event in events:
         result = load_json(event)
         result["timestamp"] = unpack_datetime(result['timestamp'])
         results.append(result)
     return results
Esempio n. 6
0
 def last_run(cls, crawler):
     last_run = cls.conn.get(make_key(crawler, "last_run"))
     return unpack_datetime(last_run)