new_id = db.insert("posts_4chan", post_data, commit=False, safe=False, return_field="id_seq") except UniqueViolation: print("Duplicate post with id %s in the SQLite dump, skipping." % post_data["id"]) db.rollback() post_data = {} continue # Add to the database! if count > 0 and count % 10000 == 0: print("Committing post %i - %i)" % (count - 10000, count)) db.commit() count += 1 db.commit() nthreads = 0 for thread in threads.values(): db.insert("threads_4chan", data=thread, commit=False, safe=False) if nthreads > 0 and nthreads % 10000 == 0: print("Committing threads %i - %i" % (nthreads - 10000, nthreads)) db.commit() nthreads += 1 db.commit() print("Done - added %i posts from %s to the 4CAT 4chan dataset" % (
def run(as_daemon=True): if not as_daemon: indent_spaces = round(shutil.get_terminal_size().columns / 2) - 33 indent = "".join([" " for i in range(0, indent_spaces) ]) if indent_spaces > 0 else "" print("\n\n") print( indent + "+---------------------------------------------------------------+" ) print( indent + "| |" ) print( indent + "| welcome to |" ) print( indent + "| |" ) print( indent + "| j88D .o88b. .d8b. d888888b |" ) print( indent + "| j8~88 d8P Y8 d8' `8b `~~88~~' |" ) print( indent + "| j8' 88 8P 88ooo88 88 |" ) print( indent + "| V88888D 8b 88~~~88 88 |" ) print( indent + "| 88 Y8b d8 88 88 88 |" ) print( indent + "| VP `Y88P' YP YP YP |" ) print( indent + "| |" ) print( indent + "| 4CAT: Capture and Analysis Toolkit |" ) print( indent + "| |" ) print( indent + "| |" ) print( indent + "+---------------------------------------------------------------+" ) print( indent + "| press q + enter to shut down |" ) print( indent + "| |" ) print( indent + "| WARNING: Not running as a daemon. Quitting this process will |" ) print( indent + "| shut down the backend as well. |" ) print( indent + "+---------------------------------------------------------------+\n\n" ) # load everything log = Logger(output=not as_daemon) db = Database(logger=log, appname="main") queue = JobQueue(logger=log, database=db) # clean up after ourselves db.commit() queue.release_all() # make it happen WorkerManager(logger=log, database=db, queue=queue, as_daemon=as_daemon) log.info("4CAT Backend shut down.")