def restart_dataset(key): """ Run a dataset's query again Deletes all underlying datasets, marks dataset as unfinished, and queues a job for it. :param str key: Dataset key :return: """ try: dataset = DataSet(key=key, db=db) except TypeError: return error(404, message="Dataset not found.") if current_user.get_id() != dataset.parameters.get("user", "") and not current_user.is_admin: return error(403, message="Not allowed.") if not dataset.is_finished(): return render_template("error.html", message="This dataset is not finished yet - you cannot re-run it.") if "type" not in dataset.parameters: return render_template("error.html", message="This is an older dataset that unfortunately lacks the information necessary to properly restart it.") for child in dataset.children: child.delete() dataset.unfinish() queue = JobQueue(logger=log, database=db) queue.add_job(jobtype=dataset.parameters["type"], remote_id=dataset.key) flash("Dataset queued for re-running.") return redirect("/results/" + dataset.key + "/")
def __init__(self, logger, job, db=None, queue=None, manager=None, modules=None): """ Basic init, just make sure our thread name is meaningful :param Database db: Database connection - if not given, a new one will be created :param JobQueue queue: Job Queue - if not given, a new one will be instantiated :param WorkerManager manager: Worker manager reference """ super().__init__() self.name = self.type self.log = logger self.manager = manager self.job = job self.init_time = int(time.time()) # all_modules cannot be easily imported into a worker because all_modules itself # imports all workers, so you get a recursive import that Python (rightly) blocks # so for workers, all_modules' content is passed as a constructor argument self.all_modules = modules self.db = Database(logger=self.log, appname=self.type) if not db else db self.queue = JobQueue(logger=self.log, database=self.db) if not queue else queue
def api_status(): """ Get service status :return: Flask JSON response """ # get job stats queue = JobQueue(logger=log, database=db) jobs = queue.get_all_jobs() jobs_count = len(jobs) jobs_types = set([job.data["jobtype"] for job in jobs]) jobs_sorted = { jobtype: len([job for job in jobs if job.data["jobtype"] == jobtype]) for jobtype in jobs_types } jobs_sorted["total"] = jobs_count # determine if backend is live by checking if the process is running lockfile = Path(config.PATH_ROOT, config.PATH_LOCKFILE, "4cat.pid") if os.path.isfile(lockfile): with lockfile.open() as pidfile: pid = pidfile.read() backend_live = int(pid) in psutil.pids() else: backend_live = False response = { "code": API_SUCCESS, "items": { "backend": { "live": backend_live, "queued": jobs_sorted }, "frontend": { "live": True # duh } } } return jsonify(response)
type=str, required=True, help="Datasource ID") cli.add_argument("-b", "--board", type=str, required=True, help="Board name") args = cli.parse_args() if not Path(args.input).exists() or not Path(args.input).is_dir(): print("%s is not a valid folder name." % args.input) sys.exit(1) input = Path(args.input).resolve() jsons = input.glob("*.json") print("Initialising queue...") logger = Logger() queue = JobQueue(logger=logger, database=Database(logger=logger, appname="queue-folder")) print("Adding files to queue...") files = 0 deadline = time.time() for file in jsons: files += 1 file = str(file) queue.add_job(args.datasource + "-thread", remote_id=file, details={ "board": args.board, "file": str(file) }, claim_after=int(deadline)) deadline += 0.1
def run(as_daemon=True): if not as_daemon: indent_spaces = round(shutil.get_terminal_size().columns / 2) - 33 indent = "".join([" " for i in range(0, indent_spaces) ]) if indent_spaces > 0 else "" print("\n\n") print( indent + "+---------------------------------------------------------------+" ) print( indent + "| |" ) print( indent + "| welcome to |" ) print( indent + "| |" ) print( indent + "| j88D .o88b. .d8b. d888888b |" ) print( indent + "| j8~88 d8P Y8 d8' `8b `~~88~~' |" ) print( indent + "| j8' 88 8P 88ooo88 88 |" ) print( indent + "| V88888D 8b 88~~~88 88 |" ) print( indent + "| 88 Y8b d8 88 88 88 |" ) print( indent + "| VP `Y88P' YP YP YP |" ) print( indent + "| |" ) print( indent + "| 4CAT: Capture and Analysis Toolkit |" ) print( indent + "| |" ) print( indent + "| |" ) print( indent + "+---------------------------------------------------------------+" ) print( indent + "| press q + enter to shut down |" ) print( indent + "| |" ) print( indent + "| WARNING: Not running as a daemon. Quitting this process will |" ) print( indent + "| shut down the backend as well. |" ) print( indent + "+---------------------------------------------------------------+\n\n" ) # load everything log = Logger(output=not as_daemon) db = Database(logger=log, appname="main") queue = JobQueue(logger=log, database=db) # clean up after ourselves db.commit() queue.release_all() # make it happen WorkerManager(logger=log, database=db, queue=queue, as_daemon=as_daemon) log.info("4CAT Backend shut down.")
def process_standalone(processor): """ Run a standalone processor This bypasses the usual 4CAT query-processor structure and allows running any available processor (see the `/api/get-standalone-processors/` endpoint) with one API call. The data is returned immediately and not saved server-side. Requires authentication. :param str processor: ID of the processor to run on incoming data :request-body object data: Data to process, a JSON-formatted list of objects with each object having at least they keys `post_id`, `thread_id`, body`, and `author`. :request-schema data: { type=object, properties={ post_id={type=string}, thread_id={type=string}, body={type=string}, author={type=string} } } :request-param str ?access_token: Access token; only required if not logged in currently. :return: A JSON object containing the processed data, with a processor-specific structure. :return-schema: { type=object, additionalProperties={} } :return-error 402: If an invalid processor is requested, or if the input is not properly-formatted JSON. :return-error 503: If too many other requests are currently being handled, so that the server does not have the capacity to deal with this request """ processors = get_standalone_processors().get_json() if processor not in processors: return error(402, error="Processor '%s' is not available" % processor) if not request.is_json: return error( 402, error="This API endpoint only accepts JSON-formatted data as input" ) try: input = request.get_json(force=True) except json.JSONDecodeError: return error(402, error="JSON decoding error") # check file integrity required = ("id", "thread_id", "body", "author") try: for row in input: for field in required: if field not in row: return error( 402, error= "Input is valid JSON, but not a list of data objects (missing field '%s')" % field) except TypeError: return error( 402, error="Input is valid JSON, but not a list of data objects") if not input: return error(402, error="Input is empty") # ok, valid input! temp_dataset = DataSet(extension="csv", type="standalone", parameters={ "user": current_user.get_id(), "after": [processor] }, db=db) temp_dataset.finish(len(input)) # make sure the file is deleted later, whichever way this request is # ultimately handled @after_this_request def delete_temporary_dataset(response): temp_dataset.delete() # also deletes children! return response # write the input as a csv file so it can be accessed as normal by # processors result_file = temp_dataset.get_results_path() with result_file.open("w") as temp_csv: writer = csv.DictWriter(temp_csv, fieldnames=required) writer.writeheader() for row in input: writer.writerow({field: row[field] for field in required}) # queue the postprocessor metadata = processors[processor] processed = DataSet(extension=metadata["extension"], type=processor, parent=temp_dataset.key, db=db) queue = JobQueue(database=db, logger=log) job = queue.add_job(processor, {}, processed.key) place_in_queue = queue.get_place_in_queue(job) if place_in_queue > 5: job.finish() return error( code=503, error= "Your request could not be handled as there are currently %i other jobs of this type in the queue. Please try again later." % place_in_queue) # wait up to half a minute for the job to be taken up # if not, tell the user to try again later start = time.time() while True: if time.time() > start + 30: job.finish() return error( code=503, error= "The server is currently too busy to handle your request. Please try again later." ) if queue.get_place_in_queue(job) != 0: time.sleep(2) continue else: break # job currently being processed, wait for it to finish while True: try: job = Job.get_by_remote_ID(job.data["remote_id"], db, processor) except JobNotFoundException: break if not job.is_finished: time.sleep(2) else: break # job finished, send file - temporary datasets will be cleaned up by # after_this_request function defined earlier return send_file(processed.get_results_path(), as_attachment=True)
from flask_limiter.util import get_remote_address import config from backend.lib.database import Database from backend.lib.logger import Logger from backend.lib.queue import JobQueue database_name = config.DB_NAME_TEST if hasattr( config.FlaskConfig, "DEBUG") and config.FlaskConfig.DEBUG == "Test" else config.DB_NAME login_manager = LoginManager() app = Flask(__name__) log = Logger() db = Database(logger=log, dbname=database_name, appname="frontend") queue = JobQueue(logger=log, database=db) # initialize openapi endpoint collector for later specification generation from webtool.lib.openapi_collector import OpenAPICollector openapi = OpenAPICollector(app) # initialize rate limiter limiter = Limiter(app, key_func=get_remote_address) # make sure a secret key was set in the config file, for secure session cookies if config.FlaskConfig.SECRET_KEY == "REPLACE_THIS": raise Exception( "You need to set a FLASK_SECRET in config.py before running the web tool." ) # initialize login manager