def create_pipeline( create_default_locations=False, shared_path=None, api_username=None, api_key=None, remote_name=None, ): pipeline = { "uuid": am.get_setting("dashboard_uuid"), "description": "Archivematica on {}".format(platform.node()), "create_default_locations": create_default_locations, "shared_path": shared_path, "api_username": api_username, "api_key": api_key, } if remote_name is not None: pipeline["remote_name"] = remote_name LOGGER.info("Creating pipeline in storage service with %s", pipeline) url = _storage_service_url() + "pipeline/" try: with ss_api_timer(function="create_pipeline"): response = _storage_api_session().post(url, json=pipeline) response.raise_for_status() except requests.exceptions.RequestException as e: LOGGER.warning( "Unable to create Archivematica pipeline in storage service from %s because %s", pipeline, e, exc_info=True, ) raise return True
def request_reingest(package_uuid, reingest_type, processing_config): """ Requests `package_uuid` for reingest in this pipeline. `reingest_type` determines what files will be copied for reingest, defined by ReingestAIPForm.REINGEST_CHOICES. Returns a dict: {'error': [True|False], 'message': '<error message>'} """ api_request = { "pipeline": am.get_setting("dashboard_uuid"), "reingest_type": reingest_type, "processing_config": processing_config, } url = _storage_service_url() + "file/" + package_uuid + "/reingest/" try: with ss_api_timer(function="request_reingest"): response = _storage_api_slow_session().post(url, json=api_request) except requests.ConnectionError: LOGGER.exception("Could not connect to storage service") return { "error": True, "message": "Could not connect to storage service" } except requests.exceptions.RequestException: LOGGER.exception("Unable to reingest %s", package_uuid) try: return response.json() except Exception: return {"error": True} return response.json()
def get_file_metadata(**kwargs): url = _storage_service_url() + "file/metadata/" with ss_api_timer(function="get_file_metadata"): response = _storage_api_slow_session().get(url, params=kwargs) if 400 <= response.status_code < 500: raise ResourceNotFound( "No file found for arguments: {}".format(kwargs)) return response.json()
def remove_files_from_transfer(transfer_uuid): """Used in ``devtools:tools/reindex-backlogged-transfers``. TODO: move tool to Dashboard management commands. """ with ss_api_timer(function="remove_files_from_transfer"): url = _storage_service_url() + "file/" + transfer_uuid + "/contents/" _storage_api_slow_session().delete(url)
def post_store_aip_callback(uuid): url = _storage_service_url( ) + "file/" + uuid + "/send_callback/post_store/" with ss_api_timer(function="post_store_aip_callback"): response = _storage_api_slow_session().get(url) try: return response.json() except Exception: return response.text
def index_backlogged_transfer_contents(transfer_uuid, file_set): """Used by ``devtools:tools/reindex-backlogged-transfers``. TODO: move tool to Dashboard management commands. """ url = _storage_service_url() + "file/" + transfer_uuid + "/contents/" with ss_api_timer(function="index_backlogged_transfer_contents"): response = _storage_api_slow_session().put(url, json=file_set) if 400 <= response.status_code < 500: raise Error("Unable to add files to transfer: {}".format( response.text))
def extract_file(uuid, relative_path, save_path): """ Fetches `relative_path` from package with `uuid` and saves to `save_path`. """ url = _storage_service_url() + "file/" + uuid + "/extract_file/" params = {"relative_path_to_file": relative_path} with ss_api_timer(function="extract_file"): response = _storage_api_slow_session().get(url, params=params, stream=True) chunk_size = 1024 * 1024 with open(save_path, "wb") as f: for chunk in response.iter_content(chunk_size): f.write(chunk) os.chmod(save_path, 0o660)
def request_file_deletion(uuid, user_id, user_email, reason_for_deletion): """ Returns the server response. """ api_request = { "event_reason": reason_for_deletion, "pipeline": am.get_setting("dashboard_uuid"), "user_email": user_email, "user_id": user_id, } url = _storage_service_url() + "file/" + uuid + "/delete_aip/" with ss_api_timer(function="request_file_deletion"): response = _storage_api_session().post(url, json=api_request) return response.json()
def get_pipeline(uuid): url = _storage_service_url() + "pipeline/" + uuid + "/" try: with ss_api_timer(function="get_pipeline"): response = _storage_api_session().get(url) if response.status_code == 404: LOGGER.warning( "This Archivematica instance is not registered with the storage service or has been disabled." ) response.raise_for_status() except requests.exceptions.RequestException: LOGGER.warning("Error fetching pipeline", exc_info=True) raise pipeline = response.json() return pipeline
def browse_location(uuid, path): """ Browse files in a location. Encodes path in base64 for transimission, returns decoded entries. """ path = b64encode_string(path) url = _storage_service_url() + "location/" + uuid + "/browse/" params = {"path": path} with ss_api_timer(function="browse_location"): response = _storage_api_session().get(url, params=params) browse = response.json() browse["entries"] = list(map(b64decode_string, browse["entries"])) browse["directories"] = list(map(b64decode_string, browse["directories"])) browse["properties"] = { b64decode_string(k): v for k, v in browse.get("properties", {}).items() } return browse
def copy_files(source_location, destination_location, files): """ Copies `files` from `source_location` to `destination_location` using SS. source_location/destination_location: Dict with Location information, result of a call to get_location. files: List of dicts with source and destination paths relative to source_location and destination_location, respectively. All other fields ignored. """ pipeline = get_pipeline(am.get_setting("dashboard_uuid")) move_files = { "origin_location": source_location["resource_uri"], "files": files, "pipeline": pipeline["resource_uri"], } # Here we attempt to decode the 'source' attributes of each move-file to # Unicode prior to passing to Slumber's ``post`` method. Slumber will do # this anyway and will choke in certain specific cases, specifically where # the JavaScript of the dashboard has base-64-encoded a Latin-1-encoded # string. for file_ in move_files["files"]: try: file_["source"] = file_["source"].decode("utf8") except UnicodeDecodeError: try: file_["source"] = file_["source"].decode("latin-1") except UnicodeError: pass url = _storage_service_url( ) + "location/" + destination_location["uuid"] + "/" try: with ss_api_timer(function="copy_files"): response = _storage_api_slow_session().post(url, json=move_files) response.raise_for_status() return (response.json(), None) except requests.exceptions.RequestException as e: LOGGER.warning("Unable to move files with %s because %s", move_files, e) return (None, e)
def get_file_info( uuid=None, origin_location=None, origin_path=None, current_location=None, current_path=None, package_type=None, status=None, ): """ Returns a list of files, optionally filtered by parameters. Queries the storage service and returns a list of files, optionally filtered by origin location/path, current location/path, or package_type. """ # TODO Need a better way to deal with mishmash of relative and absolute # paths coming in return_files = [] url = _storage_service_url() + "file/" params = { "uuid": uuid, "origin_location": origin_location, "origin_path": origin_path, "current_location": current_location, "current_path": current_path, "package_type": package_type, "status": status, "offset": 0, } while True: with ss_api_timer(function="get_file_info"): response = _storage_api_slow_session().get(url, params=params) files = response.json() return_files += files["objects"] if not files["meta"]["next"]: break params["offset"] += files["meta"]["limit"] LOGGER.debug("Files returned: %s", return_files) return return_files
def location_description_from_slug(aip_location_slug): """Retrieve the location resource description The location slug can be retrieved by microservices using the %AIPsStore%% argument. This helper enables easy access to the resource description provided at the URL. Example slugs: * /api/v2/location/3e796bef-0d56-4471-8700-eeb256859811/ * /api/v2/location/default/AS/" :param string aip_location: storage location URI slug :return: storage service location description :rtype: dict """ API_SLUG = "/api/v2/" JSON_MIME = "application/json" CONTENT_TYPE_HDR = "content-type" service_uri = _storage_service_url() service_uri = service_uri.replace(API_SLUG, aip_location_slug) response = {} with ss_api_timer(function="get_location"): response = _storage_api_session().get(service_uri) if not response or response.status_code != 200: LOGGER.warning( "Cannot retrieve storage location description from storage service: %s", response.status, ) return {} if not response.headers.get(CONTENT_TYPE_HDR) == JSON_MIME: LOGGER.warning( "Received a successful response code (%s), but an invalid content type: %s", response.status_code, response.headers.get(CONTENT_TYPE_HDR), ) return {} return response.json()
def get_location(path=None, purpose=None, space=None): """ Returns a list of storage locations, filtered by parameters. Queries the storage service and returns a list of storage locations, optionally filtered by purpose, containing space or path. purpose: How the storage is used. Should reference storage service purposes, found in storage_service/locations/models/location.py path: Path to location. If a space is passed in, paths starting with / have the space's path stripped. """ return_locations = [] if space and path: path = _storage_relative_from_absolute(path, space["path"]) space = space["uuid"] pipeline = get_pipeline(am.get_setting("dashboard_uuid")) if pipeline is None: return None url = _storage_service_url() + "location/" params = { "pipeline__uuid": pipeline["uuid"], "relative_path": path, "purpose": purpose, "space": space, "offset": 0, } while True: with ss_api_timer(function="get_location"): response = _storage_api_session().get(url, params=params) locations = response.json() return_locations += locations["objects"] if not locations["meta"]["next"]: break params["offset"] += locations["meta"]["limit"] LOGGER.debug("Storage locations returned: %s", return_locations) return return_locations
def reindex_file(transfer_uuid): url = _storage_service_url() + "file/" + transfer_uuid + "/reindex/" with ss_api_timer(function="reindex_file"): response = _storage_api_slow_session().post(url) response.raise_for_status() return response.json()
def get_default_location(purpose): url = _storage_service_url() + "location/default/{}".format(purpose) with ss_api_timer(function="get_default_location"): response = _storage_api_session().get(url) response.raise_for_status() return response.json()
def create_file( uuid, origin_location, origin_path, current_location, current_path, package_type, size, update=False, related_package_uuid=None, events=None, agents=None, aip_subtype=None, ): """Creates a new file. Note: for backwards compatibility reasons, the SS API calls "packages" "files" and this function should be read as ``create_package``. ``origin_location`` and ``current_location`` should be URIs for the Storage Service. Returns a dict with the decoded JSON response from the SS API. It may raise ``RequestException`` if the SS API call fails. """ pipeline = get_pipeline(am.get_setting("dashboard_uuid")) if pipeline is None: raise ResourceNotFound("Pipeline not available") if events is None: events = [] if agents is None: agents = [] new_file = { "uuid": uuid, "origin_location": origin_location, "origin_path": origin_path, "current_location": current_location, "current_path": current_path, "package_type": package_type, "aip_subtype": aip_subtype, "size": size, "origin_pipeline": pipeline["resource_uri"], "related_package_uuid": related_package_uuid, "events": events, "agents": agents, } LOGGER.info("Creating file with %s", new_file) errmsg = "Unable to create file from %s because %s" ret = None if update: try: session = _storage_api_slow_session() new_file["reingest"] = pipeline["uuid"] url = _storage_service_url() + "file/" + uuid + "/" with ss_api_timer(function="create_file"): response = session.put(url, json=new_file) response.raise_for_status() except requests.exceptions.RequestException as err: LOGGER.warning(errmsg, new_file, err) raise else: ret = response.json() else: try: session = _storage_api_slow_session() url = _storage_service_url() + "file/" with ss_api_timer(function="create_file"): response = session.post(url, json=new_file) response.raise_for_status() ret = response.json() except requests.exceptions.RequestException as err: LOGGER.warning(errmsg, new_file, err) raise LOGGER.info("Status code of create file/package request: %s", response.status_code) return ret