def __init__(self): self.tumblr = create_tumblr() self.redis = create_redis() self.bad = collections.defaultdict(lambda: 0) self.last_request = time.time() self.running = True self._fetch_item = None
def worker_repusher(): global running redis = create_redis() while running: for raw_work in redis.smembers("tumblr:queue:import:working"): started, work = raw_work.split(";", 1) started_delta = (time.time() - float(started)) if started_delta > 180: print("Requing work that has been idle for %s seconds." % (started_delta)) redis.srem("tumblr:queue:import:working", raw_work) redis.sadd("tumblr:queue:import", work) time.sleep(5)
def worker_feeder(): global running db = sm() tumblr = create_tumblr() redis = create_redis() while running: import_count = redis.scard("tumblr:queue:import") working_count = redis.scard("tumblr:queue:import:working") manual_count = redis.scard("tumblr:queue:manualqueue") print(f"{import_count} offsets queued. {working_count} being worked on.", flush=True) if import_count > 420 and manual_count <= 0: # Archiving secured. time.sleep(1) continue for blog, use_db in get_blogs(db, manual_count): load_blog(db, redis, tumblr, blog, use_db)
def worker(): global running db = sm() redis = create_redis() while running: post_count = redis.scard("tumblr:queue:posts") blog_count = redis.scard("tumblr:queue:blogs") has_items = (post_count + blog_count) > 0 if not has_items: time.sleep(1) continue print(f"{post_count} posts, {blog_count} blogs in queue.") # Parse blogs if blog_count > 0: add_bulk(db, redis, "blogs", "tumblr:queue:blogs") # Parse posts if post_count > 0: add_bulk(db, redis, "posts", "tumblr:queue:posts")
import os import math import random import threading import time import json from sqlalchemy.sql.expression import func from sqlalchemy import or_ from apipipeline.connections import create_redis, create_tumblr from apipipeline.model import Blog, Post, sm redis = create_redis() running = True # Worker feeder def load_blog(db, redis, tumblr, blog, use_db=False): if not use_db: info = tumblr.blog_info(blog.name) else: info = { "meta": {"status": 200}, "blog": blog.data } # In case bad data gets saved. if "posts" not in blog.data or not blog.data["posts"]: info = tumblr.blog_info(blog.name)