コード例 #1
0
    def __init__(self):
        self.tumblr = create_tumblr()
        self.redis = create_redis()
        self.bad = collections.defaultdict(lambda: 0)

        self.last_request = time.time()
        self.running = True
        self._fetch_item = None
コード例 #2
0
def worker_repusher():
    global running
    redis = create_redis()

    while running:
        for raw_work in redis.smembers("tumblr:queue:import:working"):
            started, work = raw_work.split(";", 1)
            started_delta = (time.time() - float(started))

            if started_delta > 180:
                print("Requing work that has been idle for %s seconds." % (started_delta))
                redis.srem("tumblr:queue:import:working", raw_work)
                redis.sadd("tumblr:queue:import", work)

        time.sleep(5)
コード例 #3
0
def worker_feeder():
    global running
    db = sm()
    tumblr = create_tumblr()
    redis = create_redis()

    while running:
        import_count = redis.scard("tumblr:queue:import")
        working_count = redis.scard("tumblr:queue:import:working")
        manual_count = redis.scard("tumblr:queue:manualqueue")

        print(f"{import_count} offsets queued. {working_count} being worked on.", flush=True)

        if import_count > 420 and manual_count <= 0:  # Archiving secured.
            time.sleep(1)
            continue

        for blog, use_db in get_blogs(db, manual_count):
            load_blog(db, redis, tumblr, blog, use_db)
コード例 #4
0
def worker():
    global running
    db = sm()
    redis = create_redis()

    while running:
        post_count = redis.scard("tumblr:queue:posts")
        blog_count = redis.scard("tumblr:queue:blogs")

        has_items = (post_count + blog_count) > 0
        if not has_items:
            time.sleep(1)
            continue

        print(f"{post_count} posts, {blog_count} blogs in queue.")

        # Parse blogs
        if blog_count > 0:
            add_bulk(db, redis, "blogs", "tumblr:queue:blogs")

        # Parse posts
        if post_count > 0:
            add_bulk(db, redis, "posts", "tumblr:queue:posts")
コード例 #5
0
import os
import math
import random
import threading
import time
import json

from sqlalchemy.sql.expression import func
from sqlalchemy import or_

from apipipeline.connections import create_redis, create_tumblr
from apipipeline.model import Blog, Post, sm

redis = create_redis()
running = True

# Worker feeder

def load_blog(db, redis, tumblr, blog, use_db=False):
    if not use_db:
        info = tumblr.blog_info(blog.name)
    else:
        info = {
            "meta": {"status": 200},
            "blog": blog.data
        }

        # In case bad data gets saved.
        if "posts" not in blog.data or not blog.data["posts"]:
            info = tumblr.blog_info(blog.name)