def get_documents_url(): try: args = dict(flask.request.args) user_dict = util.get_api_user(args) value = document_getter.get_documents(args, user_dict, mongo_collection) except Exception as e: msg = f"ERROR: from /get_documents: {e}" util.log_email(msg, error=True) return msg return value
def archive_failed_url(): try: args = dict(flask.request.args) user_dict = util.get_api_user(args) if not user_dict.get("admin"): raise Exception("/archive_failed is only available to admins.") value = status_updater.archive_failed(args, user_dict, mongo_collection) except Exception as e: msg = f"ERROR: from /archive_failed: {e}" util.log_email(msg, error=True) return msg return value
def get_last_document_url(): try: args = dict(flask.request.args) user_dict = util.get_api_user(args) if not user_dict.get("admin"): raise Exception("/get_last_document is only available to admins.") value = document_getter.get_last_document(args, user_dict, mongo_collection) except Exception as e: msg = f"ERROR: from /get_last_document: {e}" util.log_email(msg, error=True) return msg return value
def mongo_ingest(metadata, collection): """ dict must be entire body of post request where one of the top-level keys is "metadata" which has for its value a dict containing the metadata to ingest """ doc = collection.find_one({"archivedPath": metadata["archivedPath"]}) try: if not doc: metadata.update( { "when_ready_for_pbs": None, "when_submitted_to_pbs": None, "when_archival_queued": None, "when_archival_started": None, "when_archival_completed": None, "failed_multiple": False, "archival_status": "processing_metadata", } ) metadata = scrub_dict_keys(metadata) inserted_id = collection.insert_one(metadata).inserted_id log_email(f"Metadata inserted with id: {inserted_id}") metadata["_id"] = str(inserted_id) return metadata elif ("failed" in doc["archival_status"]) and ( doc["failed_multiple"] is not True ): # failed 1 time previously, allow this 1 retry mongo_set( "archivedPath", metadata["archivedPath"], {"archival_status": "ready_for_pbs", "failed_multiple": True}, collection, ) doc = collection.find_one({"archivedPath": metadata["archivedPath"]}) return doc elif "dry_run" in doc["archival_status"]: # do nothing, return doc return doc else: # already archived and not a dry_run msg = f"{metadata['archivedPath']} already in Mongo." log_email(msg) raise Exception(msg) except Exception as e: raise Exception(f"Metadata insertion failed with error: {e}")
def validate_source_path(*, action: str, path: str, parent: Optional[str] = None): assert path, f"{action} source path ({path}) must not be empty" path = Path(path) assert path.is_absolute(), f"{action} source path ({path}) must be an absolute path" assert path.is_dir(), f"{action} source path ({path}) is not a directory" if parent: parent = Path(parent) assert ( parent.exists() and parent.is_absolute() ), f"Parent directory {parent} isn't absolute" assert path.relative_to(parent), f"{path} does not begin with '{parent}'" if action == "archive": log_email(f"{path} will be archived") else: log_email(f"{path} will be retrieved from the archive") return str(path)
def get_mongo_client(): try: user = urllib.parse.quote_plus( config.mongo.get("user") ) # percent-escape string passwd = urllib.parse.quote_plus( config.mongo.get("passwd") ) # percent-escape string host = config.mongo.get("host") port = config.mongo.get("port") uri = f"mongodb://{user}:{passwd}@{host}:{port}" client_obj = pymongo.MongoClient(uri, authSource=config.mongo.get("authdb")) client_obj.admin.command( "ismaster" ) # tests for client != None and good connection except Exception as e: log_email(f"ERROR: could not connect to '{host}:{port}': {e}") return None return client_obj
def get_mongo_collection( client_obj=None, database_name=config.mongo.get("db"), collection_name=config.mongo.get("collection"), ): if not isinstance(client_obj, pymongo.mongo_client.MongoClient): client_obj = get_mongo_client() if not client_obj: log_email(f"ERROR: get_mongo_client() failed.") return None try: db_obj = client_obj[database_name] collection_obj = db_obj[collection_name] except Exception as e: log_email( f"ERROR: could not connect to collection " + f"'{database_name}.{collection_name}': {e}" ) return None return collection_obj
def submit_to_pbs( source: str, dest: str, action: str, group: str = None, obj_id: str = None ): # TODO: copy shell scripts to this repo and adjust curl for correct # name of endpoints script = ( config.PBS_ARCHIVE_SCRIPT if action == "archive" else config.PBS_RETRIEVE_SCRIPT ) cmd = f'/usr/local/bin/qsub -v IN="{source}",OUT="{dest}",GROUP="{group}",ID="{obj_id}" "{script}"' log_email(f"Submitting job: {cmd}") proc = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) (o, e) = proc.communicate() try: if proc.returncode == 0: job_id = o.decode().replace("\n", "") else: raise ValueError("error submitting job: " + e.decode()) except Exception as e: msg = f"pbs error: {e}" log_email(msg) return None log_email(f"Submitted to PBS: {job_id}\nstderr:{e.decode()}\nstdout:{o.decode()}") return job_id
def get_permitted_records_list(user_dict, cursor): records = [] if user_dict.get("admin"): for record in cursor: records.append(record) return records user_groups_list = user_dict.get("groups_list") if not isinstance(user_groups_list, list): raise Exception( util.gen_msg( f"user_dict['groups_list'] '{user_groups_list}' is not a list; user_dict: '{user_dict}'" ) ) for record in cursor: if "system_groups" not in record: util.log_email( util.gen_msg(f"record has no system_groups key; record: '{record}'") ) continue allowed_groups_list = record.get("system_groups") if not isinstance(allowed_groups_list, list): util.log_email( util.gen_msg( f"record['groups_list'] '{allowed_groups_list}' is not a list; record: '{record}'" ) ) for group in allowed_groups_list: if group in user_groups_list: records.append(record) break return records
def validate_destination_path(*, action: str, path: str, parent: Optional[str] = None): assert path, f"{action} destination path ({path}) must not be empty" log_email(f"received path: {path}") path = Path(path) assert ( path.is_absolute() ), f"{action} destination path ({path}) must be an absolute path" if action != "retrieve": assert not path.exists(), f"{action} destination path ({path}) must not exist" if path.exists(): log_email(f"path for {action} request {path} exists") return False if parent: parent = Path(parent) assert ( parent.exists() and parent.is_absolute() ), f"Parent directory {parent} isn't absolute" assert path.relative_to(parent), f"{path} does not begin with '{parent}'" if action == "archive": log_email(f"archiver will deposit files at {path}") else: log_email(f"archiver will retrieve files to {path}") return str(path)
import flask import flask_session import markupsafe ## local imports: import config import util import status_updater import document_getter ## init mongodb collection object for config.mongo['collection']; ## then can pass collection object to modules that need it: mongo_collection = util.get_mongo_collection() if not mongo_collection: util.log_email("ERROR: could not connect to collection.", error=True) sys.exit(3) ## initialize flask object: app = flask.Flask(__name__.split(".")[0]) ############################################################################################################# ## ROUTES: @app.route("/archive", methods=["POST"]) def archive_url(): url = "/archive" if flask.request.is_json: ## submitted parameters thru api call w/ json return f"ERROR: POST reached unimplemented route '{url}'; args: '{flask.request.json}'" else: ## submitted parameters thru web page form
def mongo_delete_doc(key, val, collection): query = {"_id": ObjectId(val)} if "_id" == key else {key: val} collection.delete_one(query) log_email(f"Deleting") return
def retrieve_archived_directory( *, json_arg, api_user, collection, debug: bool = False ) -> None: """ :description: Retrieve a number of items from the archive. :param json_args: A decoded JSON string which it at its top level a dictionary. Must have the following keys: requested_dirs, and api_key. The delivery path is inferred. :param debug: Cause a dry-run of submitting to pbs; the request will be ignored. """ log_email( f"{api_user['fname']} ({api_user['username']}) retrieving: {json_arg['requested_dirs']}" ) try: if "debug" in json_arg.keys(): debug = json_arg["debug"] number_submitted = 0 # [obj_id1, obj_id2] for obj_id in json_arg["requested_dirs"]: if not add_current_user(api_user, obj_id, collection): raise Exception(f"Could not add {api_user} to metadata for {obj_id}") metadata = get_document_by_objectid_1(obj_id, api_user, collection) source_path = metadata["archivedPath"] destination_path = f"/fastscratch/recovered{source_path}" system_groups = metadata.get("system_groups") if not system_groups: raise Exception(f"Error getting 'system_groups' for obj_id '{obj_id}'") intersect = permitted_groups(api_user, metadata) if not intersect: log_email( f"user {api_user['username']} does not have permission to retrieve {obj_id}" ) continue if not debug: job_id = submit_to_pbs( source_path, destination_path, "retrieve", intersect[0], obj_id ) if "retrievals" not in metadata.keys(): metadata["retrievals"] = [] retrievals = metadata["retrievals"] if job_id: next_retrieval = { "job_id": job_id, "retrieval_status": "submitted", "when_retrieval_submitted": get_timestamp(), } number_submitted += 1 else: next_retrieval = { "job_id": None, "retrieval_status": "failed", "when_retrieval_failed": get_timestamp(), } log_email(f"Error submitting to pbs for {obj_id}", True) retrievals.append(next_retrieval) mongo_set( "archivedPath", source_path, {"retrievals": retrievals}, collection ) else: return f"Dry run request to retrieve {obj_id}. No submission to pbs." return_msg = f"{number_submitted} out of {len(json_arg['requested_dirs'])}" +" retrieval requests successfully submitted." log_email(return_msg) return return_msg except Exception as e: err_msg = f"Error processing retrieval request: {e}" log_email(err_msg) return err_msg
def archive_directory(json_arg, api_user, collection, debug: bool = False) -> None: """ :param json_arg: A decoded JSON string which it at its top level a dictionary. Must have the following keys: requested_dest_dir, source_folder, and metadata. Additional keys are ignored. :param debug: Cause a dry-run of submitting to PBS; the result will be metadata ingested and no submission to pbs. """ try: # validate request request_error = request_invalid(json_arg) if request_error: raise ValueError(request_error) except Exception as e: log_email(f"Error processing request: {e}") return f"Error processing request: {e}", 400 try: # validate and preprocess metadata metadata = process_metadata(json_arg, api_user) metadata_error = metadata_invalid(metadata) if metadata_error: raise ValueError(metadata_error) except Exception as e: return f"Error processing metadata: {e}", 400 log_email( f"{api_user['fname']} {api_user['lname']} ({api_user['username']}) requesting" + f" to archive {metadata['source_folder']}" ) try: # create and insert archivedPath metadata = insert_archived_path(metadata) except Exception as e: return (f"Error processing and/or inserting archivedPath: {e}", 400) try: # insert metadata into mongoDB metadata = mongo_ingest(metadata) except Exception as e: return f"Error ingesting metadata: {e}", 400 try: # validate tentative archivedPath source_path = metadata["source_folder"] destination_path = validate_destination_path( action="archive", path=metadata["archivedPath"], parent="/" ) if destination_path: mongo_set( "archivedPath", destination_path, { "when_ready_for_pbs": get_timestamp(), "archival_status": "ready_for_pbs", }, collection, ) else: # requested archivedPath not valid send_to_name = ( f"{api_user['fname'].capitalize()} {api_user['lname'].capitalize()}" ) send_email(api_user["email"], dup_archive_request_body(), send_to_name) raise Exception( f"archive destination path '{destination_path}' must not exist." ) except Exception as e: # destination_path already in archive source_path = metadata["source_folder"] destination_path = metadata["archivedPath"] status = metadata["archival_status"] if "completed" not in status: mongo_set( "archivedPath", destination_path, { "archival_status": "failed", "exception_caught": str(e), "when_archival_failed": get_timestamp(), }, collection, ) msg = f"Error while validating tentative archivedPath: {e}" log_email(msg) return msg, 400 try: if "debug" in json_arg.keys(): debug = json_arg["debug"] if not debug: if is_valid_for_pbs(destination_path): mongo_set( "archivedPath", destination_path, {"archival_status": "submitting"}, collection, ) job_id = submit_to_pbs(source_path, destination_path, "archive") if job_id: # successfully submitted mongo_set( "archivedPath", destination_path, { "archival_status": "submitted", "when_submitted_to_pbs": get_timestamp(), "job_id": job_id, }, collection, ) return {"id": str(metadata["_id"])} else: # failed mongo_set( "archivedPath", destination_path, { "archival_status": "failed", "when_archival_failed": get_timestamp(), "job_id": job_id, }, collection, ) return "Submitting to pbs failed, please see logs.", 400 else: status = metadata["archival_status"] msg = f"Archive request denied. Current status of " +f"{metadata['archivedPath']}: {status}" log_email(msg) return msg, 400 else: if "completed" not in metadata["archival_status"]: mongo_set( "archivedPath", metadata["archivedPath"], {"archival_status": "dry_run"}, collection, ) return ( { "message": f"Dry run request, metadata for '{metadata['archivedPath']}'" + " present in mongo and not archived. Request not submitted" }, 200, ) return ( f"Dry run request and {metadata['archivedPath']} previously " + "archived. Request not submitted.", 200, ) except Exception as e: # noqa: e722 return f"Failed to send to queue with error: {e}", 400