# this should have been done in the 1.9 -> 1.10 migration script, but alas... from backend.lib.database import Database from backend.lib.logger import Logger import psycopg2 import config log = Logger(output=True) db = Database(logger=log, dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT, appname="4cat-migrate") for datasource in ("4chan", "8kun", "8chan"): print(" Checking for %s database tables... " % datasource, end="") test = db.fetchone("SELECT EXISTS ( SELECT FROM information_schema.tables WHERE table_schema = %s AND table_name = %s )", ("public", "posts_%s" % datasource)) if not test["exists"]: print("not available, nothing to upgrade!") continue print(" Checking if required columns exist... ", end="") columns = [row["column_name"] for row in db.fetchall("SELECT column_name FROM information_schema.columns WHERE table_name = %s", ("posts_%s" % datasource,))] if "image_url" in columns: print("yes!") else: print(" adding 'image_url' column to %s posts table" % datasource) db.execute("ALTER TABLE posts_%s ADD COLUMN image_url TEXT DEFAULT NONE" % datasource)
from backend.lib.logger import Logger import psycopg2 import config log = Logger(output=True) db = Database(logger=log, dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT, appname="4cat-migrate") print(" Checking for 4chan database tables... ", end="") try: test = db.fetchone("SELECT * FROM posts_4chan LIMIT 1") except psycopg2.ProgrammingError: print("not available, nothing to upgrade!") exit(0) print("\n Adding 'board' column to 4chan posts table") db.execute("ALTER TABLE posts_4chan ADD COLUMN board TEXT DEFAULT ''") print(" Filling 'board' column") db.execute( "UPDATE posts_4chan SET board = ( SELECT board FROM threads_4chan WHERE id = posts_4chan.thread_id )" ) print(" Creating index") db.execute("CREATE UNIQUE INDEX posts_4chan_id ON posts_4chan ( id, board )")
# update threads print("Updating threads.") for thread_id in threads: thread = threads[thread_id] thread["id"] = thread_id thread["board"] = args.board thread["timestamp_scraped"] = -1 thread["num_unique_ips"] = -1 thread["num_replies"] = 0 thread["num_images"] = 0 thread["is_sticky"] = True if thread["is_sticky"] == 1 else False thread["is_closed"] = True if thread["is_closed"] == 1 else False exists = db.fetchone( "SELECT * FROM threads_" + args.datasource + " WHERE id = %s", (thread_id, )) if not exists: db.insert("threads_" + args.datasource, thread) else: if thread["timestamp"] < exists["timestamp"]: thread["is_sticky"] = exists["is_sticky"] thread["is_closed"] = exists["is_closed"] thread["post_last"] = max(int(thread.get("post_last") or 0), int(exists.get("post_last") or 0)) thread["timestamp_modified"] = max( int(thread.get("timestamp_modified") or 0), int(exists.get("timestamp_modified") or 0)) thread["timestamp_modified"] = max(
try: with jsonfile.open() as input: posts = json.load(input)["posts"] except json.JSONDecodeError: print("ERROR PARSING FILE - SKIPPING: %s" % jsonfile) continue if not posts: print("Empy thread %s, skipping." % jsonfile) continue op = posts[0] last_post = max([post["no"] for post in posts if "no" in post]) thread_id = op["no"] thread_exists = db.fetchone( "SELECT id FROM threads_4chan WHERE id = %s AND board = %s", (thread_id, args.board)) if not thread_exists: bumplimit = ("bumplimit" in op and op["bumplimit"] == 1) or ("bumplocked" in op and op["bumplocked"] == 1) db.insert( "threads_4chan", { "id": thread_id, "board": args.board, "timestamp": op["time"], "timestamp_scraped": int(time.time()), "timestamp_modified": op["time"], "num_unique_ips": op["unique_ips"] if "unique_ips" in op else -1,
import psycopg2 import config log = Logger(output=True) db = Database(logger=log, dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT, appname="4cat-migrate") for datasource in ("8kun", "8chan"): print(" Checking for %s database tables... " % datasource, end="") try: test = db.fetchone("SELECT * FROM posts_%s LIMIT 1" % datasource) print("found!") except psycopg2.ProgrammingError: print("not available, nothing to upgrade!") continue print(" Checking if required columns exist... ", end="") columns = [ row["column_name"] for row in db.fetchall( "SELECT column_name FROM information_schema.columns WHERE table_name = %s", ("posts_%s" % datasource, )) ] if "board" in columns: print("yes!") else: print(" adding 'board' column to %s posts table" % datasource)