def __init__(self, logger, job, db=None, queue=None, manager=None, modules=None): """ Basic init, just make sure our thread name is meaningful :param Database db: Database connection - if not given, a new one will be created :param JobQueue queue: Job Queue - if not given, a new one will be instantiated :param WorkerManager manager: Worker manager reference """ super().__init__() self.name = self.type self.log = logger self.manager = manager self.job = job self.init_time = int(time.time()) # all_modules cannot be easily imported into a worker because all_modules itself # imports all workers, so you get a recursive import that Python (rightly) blocks # so for workers, all_modules' content is passed as a constructor argument self.all_modules = modules self.db = Database(logger=self.log, appname=self.type) if not db else db self.queue = JobQueue(logger=self.log, database=self.db) if not queue else queue
""" import argparse import psycopg2 import time import sys import re import os sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)) + "/..") from backend.lib.database import Database from backend.lib.logger import Logger from webtool.lib.user import User log = Logger() db = Database(logger=log, appname="create-user") cli = argparse.ArgumentParser() cli.add_argument("-u", "--username", required=True, help="Name of user (must be unique)") args = cli.parse_args() if __name__ != "__main__": sys.exit(1) if not re.match(r"[^@]+\@.*?\.[a-zA-Z]+", args.username): print("Please provide an e-mail address as username.") sys.exit(1) try: db.insert("users", data={"name": args.username, "timestamp_token": int(time.time())})
default=True) args = cli.parse_args() args.truncate = bool(args.truncate) limit = int(args.limit) sourcefile = Path(args.input) if not sourcefile.exists(): print("The file %s does not exist" % sourcefile) exit(1) dbconn = sqlite3.connect(args.input) dbconn.row_factory = sqlite3.Row cursor = dbconn.cursor() db = Database(logger=Logger()) db.execute(open("database.sql").read()) if args.truncate: db.execute("TRUNCATE posts_usenet") db.execute("TRUNCATE threads_usenet") db.execute("TRUNCATE groups_usenet") db.commit() post_to_threads = {} posts = cursor.execute("SELECT * FROM postsdata") print("Loading posts....") done = 0 while posts: post = posts.fetchone() if not post or (limit and done > limit):
required=True, help="Name of SQLite table containing threads.") cli.add_argument("-p", "--posts_table", type=str, required=True, help="Name of the SQLite table containing posts.") cli.add_argument("-b", "--board", type=str, required=True, help="Board name") args = cli.parse_args() if not Path(args.input).exists() or not Path(args.input).is_file(): print("%s is not a valid folder name." % args.input) sys.exit(1) logger = Logger() db = Database(logger=logger, appname="queue-dump") seen_post_ids = set() # Columns from 4archive dumps posts_columns = ["id", "chan_id", "threads_id", "chan_image_name", "image_size", \ "image_dimensions", "thumb_dimensions", "image_url", "original_image_name", "subject", \ "name", "chan_user_id", "tripcode", "capcode", "chan_post_date", "body", "available"] threads_columns = ["id", "thread_id", "board", "archive_date", "update_date", "user_ips", \ "times_updated", "views", "admin_note", "secret", "available", "alive", "takedown_reason", \ "busy", "tweeted"] conn = sqlite3.connect(args.input) print("Connected to SQLite database.") count = 0
def run(as_daemon=True): if not as_daemon: indent_spaces = round(shutil.get_terminal_size().columns / 2) - 33 indent = "".join([" " for i in range(0, indent_spaces) ]) if indent_spaces > 0 else "" print("\n\n") print( indent + "+---------------------------------------------------------------+" ) print( indent + "| |" ) print( indent + "| welcome to |" ) print( indent + "| |" ) print( indent + "| j88D .o88b. .d8b. d888888b |" ) print( indent + "| j8~88 d8P Y8 d8' `8b `~~88~~' |" ) print( indent + "| j8' 88 8P 88ooo88 88 |" ) print( indent + "| V88888D 8b 88~~~88 88 |" ) print( indent + "| 88 Y8b d8 88 88 88 |" ) print( indent + "| VP `Y88P' YP YP YP |" ) print( indent + "| |" ) print( indent + "| 4CAT: Capture and Analysis Toolkit |" ) print( indent + "| |" ) print( indent + "| |" ) print( indent + "+---------------------------------------------------------------+" ) print( indent + "| press q + enter to shut down |" ) print( indent + "| |" ) print( indent + "| WARNING: Not running as a daemon. Quitting this process will |" ) print( indent + "| shut down the backend as well. |" ) print( indent + "+---------------------------------------------------------------+\n\n" ) # load everything log = Logger(output=not as_daemon) db = Database(logger=log, appname="main") queue = JobQueue(logger=log, database=db) # clean up after ourselves db.commit() queue.release_all() # make it happen WorkerManager(logger=log, database=db, queue=queue, as_daemon=as_daemon) log.info("4CAT Backend shut down.")
required=True, help="Datasource ID") cli.add_argument("-b", "--board", type=str, required=True, help="Board name") args = cli.parse_args() if not Path(args.input).exists() or not Path(args.input).is_dir(): print("%s is not a valid folder name." % args.input) sys.exit(1) input = Path(args.input).resolve() jsons = input.glob("*.json") print("Initialising queue...") logger = Logger() queue = JobQueue(logger=logger, database=Database(logger=logger, appname="queue-folder")) print("Adding files to queue...") files = 0 deadline = time.time() for file in jsons: files += 1 file = str(file) queue.add_job(args.datasource + "-thread", remote_id=file, details={ "board": args.board, "file": str(file) }, claim_after=int(deadline)) deadline += 0.1
# this should have been done in the 1.9 -> 1.10 migration script, but alas... from backend.lib.database import Database from backend.lib.logger import Logger import psycopg2 import config log = Logger(output=True) db = Database(logger=log, dbname=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT, appname="4cat-migrate") for datasource in ("4chan", "8kun", "8chan"): print(" Checking for %s database tables... " % datasource, end="") test = db.fetchone("SELECT EXISTS ( SELECT FROM information_schema.tables WHERE table_schema = %s AND table_name = %s )", ("public", "posts_%s" % datasource)) if not test["exists"]: print("not available, nothing to upgrade!") continue print(" Checking if required columns exist... ", end="") columns = [row["column_name"] for row in db.fetchall("SELECT column_name FROM information_schema.columns WHERE table_name = %s", ("posts_%s" % datasource,))] if "image_url" in columns: print("yes!") else: print(" adding 'image_url' column to %s posts table" % datasource) db.execute("ALTER TABLE posts_%s ADD COLUMN image_url TEXT DEFAULT NONE" % datasource)
from pathlib import Path cli = argparse.ArgumentParser() cli.add_argument("-i", "--input", required=True, help="csv to import") args = cli.parse_args() input = Path(args.input) if not input.exists(): print("File not found") sys.exit(1) with open(input) as i: reader = csv.DictReader(i) rows = 0 for row in reader: rows += 1 required = ("id", "thread_id", "subject", "author", "timestamp", "body") for field in required: if field not in reader.fieldnames: print("Column '%s' missing." % field) sys.exit(1) logger = Logger() new_set = DataSet( parameters={"user": "******", "filename": input.name, "time": int(time.time()), "datasource": "custom", "board": "upload"}, type="custom", db=Database(logger=logger)) shutil.copyfile(input, new_set.get_results_path()) new_set.finish(rows)
description="Deletes a query, the corresponding job, and any sub-queries.") cli.add_argument("-k", "--key", required=True, help="Query key to delete.") cli.add_argument( "-q", "--quiet", type=bool, default=False, help="Whether to skip asking for confirmation. Defaults to false.") args = cli.parse_args() if not args.quiet: confirm = input( "This will delete the query, and any sub-queries. Are you sure? (y/n)") if confirm.strip().lower() != "y": sys.exit(0) logger = Logger() database = Database(logger=logger, appname="delete-query") # Initialize query try: parent = DataSet(key=args.key, db=database) except TypeError: print("No query found with that key.") sys.exit(1) parent.delete() print( "Done. Note that running jobs for the queries above are not stopped; you will have to wait for them to finish on their own." )
from flask_limiter import Limiter from flask_limiter.util import get_remote_address import config from backend.lib.database import Database from backend.lib.logger import Logger from backend.lib.queue import JobQueue database_name = config.DB_NAME_TEST if hasattr( config.FlaskConfig, "DEBUG") and config.FlaskConfig.DEBUG == "Test" else config.DB_NAME login_manager = LoginManager() app = Flask(__name__) log = Logger() db = Database(logger=log, dbname=database_name, appname="frontend") queue = JobQueue(logger=log, database=db) # initialize openapi endpoint collector for later specification generation from webtool.lib.openapi_collector import OpenAPICollector openapi = OpenAPICollector(app) # initialize rate limiter limiter = Limiter(app, key_func=get_remote_address) # make sure a secret key was set in the config file, for secure session cookies if config.FlaskConfig.SECRET_KEY == "REPLACE_THIS": raise Exception( "You need to set a FLASK_SECRET in config.py before running the web tool." )
cli.add_argument( "-f", "--fast", default=False, type=bool, help= "Use batch queries instead of inserting posts individually. This is far faster than 'slow' mode, " "but will crash if trying to insert a duplicate post, so it should only be used on an empty " "database or when you're sure datasets don't overlap.") args = cli.parse_args() if not os.path.exists(args.input): print("File not found: %s" % args.input) sys.exit(1) db = Database(logger=Logger(), appname="4chan-import") print("Opening %s." % args.input) if args.skip > 0: print("Skipping %i posts." % args.skip) if args.fast: print("Fast mode enabled.") with open(args.input, encoding="utf-8") as inputfile: postscsv = csv.DictReader(inputfile, fieldnames=FourPlebs.columns, dialect=FourPlebs) postbuffer = [] threads = {}