def upgrade(): # ### commands auto generated by Alembic - please adjust! ### # op.add_column('posts', sa.Column('body', sa.Unicode(), nullable=True)) # ### end Alembic commands ### # Force commit the operation conn = op.get_bind() conn.execute("COMMIT") updates = [] added = 0 db = sm() commit_db = sm() for post in db.query(Post).filter(sa.or_( Post.data.has_key("body"), )).yield_per(512): new_data = post.data.copy() update = {"id": post.id, "data": new_data} update["body"] = new_data.pop("body", post.body) updates.append(update) # Commit every 1024 posts. added += 1 if len(updates) == 1024: commit_db.execute("SET synchronous_commit TO off") print(f"{added} posts processed.", flush=True) commit_db.bulk_update_mappings(Post, updates) commit_db.commit() updates.clear() if updates: commit_db.bulk_update_mappings(Post, updates) commit_db.commit()
def upgrade(): ### commands auto generated by Alembic - please adjust! ### op.add_column( 'blogs', sa.Column('total_likes', sa.Integer(), server_default='0', nullable=False)) # ### end Alembic commands ### # Force commit the operation conn = op.get_bind() conn.execute("COMMIT") updates = [] added = 0 db = sm() commit_db = sm() for blog in db.query(Blog).filter( sa.or_(Blog.data.has_key("likes"), Blog.data.has_key("share_likes"))).yield_per(512): try: new_data = blog.data.copy() new_data.pop("share_likes", None) total_likes = max(new_data.pop("likes", 0), blog.total_likes) updates.append({ "id": blog.id, "total_likes": total_likes, "data": new_data }) except KeyError: continue # Commit every 512 blogs. added += 1 if len(updates) == 512: commit_db.execute("SET synchronous_commit TO off") print(f"{added} blogs processed.", flush=True) commit_db.bulk_update_mappings(Blog, updates) commit_db.commit() updates.clear() if updates: commit_db.bulk_update_mappings(Blog, updates) commit_db.commit()
def __init__(self): self.tumblr = create_tumblr() self.db = sm() self.grabbed = collections.defaultdict(lambda: 0) self.bad = collections.defaultdict(lambda: 0) self.queue = [] self.running = True
def upgrade(): db = sm() for post in db.query(Post): blog = get_blog(db, post.url) post.author = blog.id sys.stdout.write(".") sys.stdout.write("\nCommiting.\n") db.commit()
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column('blogs', sa.Column('updated', sa.DateTime(), nullable=True)) # ### end Alembic commands ### # Force commit the operation conn = op.get_bind() conn.execute("COMMIT") updates = [] added = 0 db = sm() commit_db = sm() for blog in db.query(Blog).filter(sa.or_( Blog.data.has_key("updated") )).yield_per(512): try: new_data = blog.data.copy() updated_epoch = new_data.pop("updated", 0) updated_date = max( datetime.datetime.fromtimestamp(updated_epoch), getattr(blog, "updated", None) or datetime.datetime.fromtimestamp(updated_epoch) ) updates.append({ "id": blog.id, "updated": updated_date, "data": new_data }) except KeyError: continue # Commit every 512 blogs. added += 1 if len(updates) == 512: commit_db.execute("SET synchronous_commit TO off") print(f"{added} blogs processed.", flush=True) commit_db.bulk_update_mappings(Blog, updates) commit_db.commit() updates.clear() if updates: commit_db.bulk_update_mappings(Blog, updates) commit_db.commit()
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column('posts', sa.Column('tumblr_id', sa.BigInteger(), nullable=True)) # ### end Alembic commands ### # Force commit the operation conn = op.get_bind() conn.execute("COMMIT") updates = [] added = 0 db = sm() commit_db = sm() for post in db.query(Post).filter(sa.or_( Post.data.has_key("id"), )).yield_per(512): try: new_data = post.data.copy() tumblr_id = new_data.pop("id") updates.append({ "id": post.id, "tumblr_id": tumblr_id, "data": new_data }) except KeyError: continue # Commit every 512 posts. added += 1 if len(updates) == 512: commit_db.execute("SET synchronous_commit TO off") print(f"{added} posts processed.", flush=True) commit_db.bulk_update_mappings(Post, updates) commit_db.commit() updates.clear() if updates: commit_db.bulk_update_mappings(Post, updates) commit_db.commit()
def worker(): sql = sm() while running and len(urls) > 0: url = urls.pop(random.randrange(len(urls))) try: process_url(sql, url) except: if sentry_sdk: sentry_sdk.capture_exception() traceback.print_exc()
def connect_sql(): if request.endpoint == "static": return g.sql = sm()
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table( 'posts_meta', sa.Column('id', sa.Integer(), nullable=False), sa.Column('can_like', sa.Boolean(), server_default='t', nullable=True), sa.Column('can_reblog', sa.Boolean(), server_default='t', nullable=True), sa.Column('can_reply', sa.Boolean(), server_default='t', nullable=True), sa.Column('display_avatar', sa.Boolean(), server_default='t', nullable=True), sa.Column('is_blocks_post_format', sa.Boolean(), server_default='f', nullable=True), sa.Column('can_send_in_message', sa.Boolean(), server_default='t', nullable=True), sa.Column('post_url', sa.String(), nullable=True), sa.Column('short_url', sa.String(), nullable=True), sa.ForeignKeyConstraint( ['id'], ['posts.id'], ), sa.PrimaryKeyConstraint('id')) # op.add_column('posts', sa.Column('slug', sa.Unicode(), nullable=True)) # op.add_column('posts', sa.Column('state', sa.String(), nullable=True)) # op.add_column('posts', sa.Column('summary', sa.Unicode(), nullable=True)) # ### end Alembic commands ### # Force commit the operation conn = op.get_bind() conn.execute("COMMIT") updates = [] added = 0 db = sm() commit_db = sm() for post in db.query(Post).filter( sa.or_(Post.data.has_key("slug"), Post.data.has_key("state"), Post.data.has_key("summary"), Post.data.has_key("liked"))).yield_per(512): new_data = post.data.copy() update = {"id": post.id, "data": new_data} update["slug"] = new_data.pop("slug", post.slug) update["state"] = new_data.pop("state", post.state) update["summary"] = new_data.pop("summary", post.summary) post_meta = PostMeta.create_from_metadata(commit_db, new_data, post.id) updates.append(update) # Commit every 512 posts. added += 1 if len(updates) == 512: commit_db.execute("SET synchronous_commit TO off") print(f"{added} posts processed.", flush=True) commit_db.bulk_update_mappings(Post, updates) commit_db.commit() updates.clear() if updates: commit_db.bulk_update_mappings(Post, updates) commit_db.commit()
def db(self): return sm()
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### # op.add_column('posts', sa.Column('format', sa.String(), nullable=True)) # op.add_column('posts', sa.Column('note_count', sa.Integer(), nullable=True)) # op.add_column('posts', sa.Column('posted', sa.DateTime(), nullable=True)) # op.add_column('posts', sa.Column('reblog_key', sa.String(), nullable=True)) # ### end Alembic commands ### # Force commit the operation conn = op.get_bind() conn.execute("COMMIT") updates = [] added = 0 db = sm() commit_db = sm() for post in db.query(Post).filter(sa.or_( Post.data.has_key("format"), Post.data.has_key("note_count"), Post.data.has_key("timestamp"), Post.data.has_key("reblog_key"), )).yield_per(512): try: new_data = post.data.copy() # Post time new_data.pop("date", None) post_epoch = new_data.pop("timestamp", 0) post_time = max( datetime.datetime.fromtimestamp(post_epoch), post.posted or datetime.datetime.fromtimestamp(post_epoch) ) # Other fields post_format = new_data.pop("format", post.format) post_note_count = max( post.note_count or 0, new_data.pop("note_count", 0) ) post_reblog_key = new_data.pop("reblog_key", post.reblog_key) updates.append({ "id": post.id, "posted": post_time, "note_count": post_note_count, "reblog_key": post_reblog_key, "data": new_data }) except KeyError: continue # Commit every 512 posts. added += 1 if len(updates) == 512: commit_db.execute("SET synchronous_commit TO off") print(f"{added} posts processed.", flush=True) commit_db.bulk_update_mappings(Post, updates) commit_db.commit() updates.clear() if updates: commit_db.bulk_update_mappings(Post, updates) commit_db.commit()
import sys import time import json from urllib.parse import urlparse from sqlalchemy.dialects.postgresql import insert from archives.lib.connections import create_tumblr from archives.lib.model import Post, Blog, sm sql = sm() tungle = create_tumblr() META_POP = ["status", "msg", ""] for post in sql.query(Post).distinct(Post.url): # Terrible but there's not many blogs in the DB so it can't be that bad. if sql.query(Blog).filter(Blog.name == post.url).scalar(): continue time.sleep(0.25) info = tungle.blog_info(post.url) # Ignore 404s if "meta" in info: if info["meta"]["status"] == 404: print(f"{post.url} 404") continue # wot how
import code from redis import StrictRedis from archives.lib.model import sm, Blog, Post from archives.lib.connections import create_tumblr, redis_pool from archives.tasks.tumblr import add_post, archive_post, archive_blog db = sm() redis = StrictRedis(connection_pool=redis_pool) tumblr = create_tumblr() if __name__ == "__main__": code.interact(local=dict(globals(), **locals()))