def post(self) -> None: """ POST: returns size of passed directory """ # print('------------- Disk storage checker entered-------------') env_storage_limit = "WORKSPACE_STORAGE_LIMIT" try: result = json.loads(self.request.body.decode('utf-8')) max_disk_storage_gb = os.environ.get(env_storage_limit) #max_disk_storage_gb = 0 if max_disk_storage_gb != None: max_disk_storage_gb = int(max_disk_storage_gb) # run update in background -> somtimes it might need to much time to run thread = threading.Thread(target=update_workspace_metadata) thread.daemon = True thread.start() size_in_gb = get_workspace_size() if size_in_gb > max_disk_storage_gb: # sleep 50 ms -> metadata file should have been updated, otherwise use old metadata time.sleep(0.05) size_in_gb = get_workspace_size() if size_in_gb > (max_disk_storage_gb * 1.5): # Automatic cleanup try: from lab_client import Environment Environment().cleanup() except Exception as ex: log.info("Failed to cleanup enviornment", ex) # TODO: Do not do a full workspace cleanup -> bad side effects # Fallback - clean full workspace folder # cleanup_folder(WORKSPACE_PATH) number_shortened = size_in_gb if "." in str(size_in_gb): # quick check if it is decimal number_shortened = str(size_in_gb).split( ".")[0] + '.' + str(size_in_gb).split(".")[1][:2] result["workspaceSize"] = number_shortened result["restrictedSize"] = max_disk_storage_gb result["status"] = 1 if (size_in_gb > max_disk_storage_gb): print( "You have exceeded the limit of available disk storage assigned to your workspace. Please clean up." ) result["status"] = 0 self.finish(json.dumps(result)) except Exception as e: result['status'] = 3 print("No Environment variable set for", env_storage_limit) self.finish(json.dumps(result))
def put(self) -> None: try: from lab_client import Environment Environment().cleanup() except: # Fallback - clean full workspace folder cleanup_folder(WORKSPACE_PATH)
def upload_file(project, path, type): """ Uploads a file from the PATH to the remote storage. """ try: from lab_client import Environment Environment(project=project).upload_file(path, type) except: log.exception("Failed to upload file " + str(path) + " to project: " + str(project))
def cleanup(project): """ Cleanup environment folder to reduce disk space usage. Removes all files with more than 50 MB that haven't been used for the last 3 days. """ try: from lab_client import Environment Environment(project=project).cleanup() except: log.exception("Failed to cleanup environment.")
def backup_folder(folder_to_backup: str, cache_folder: str): """Backup a folder to the user project.""" if not os.path.isdir(folder_to_backup): log.warning("Folder does not exist: " + folder_to_backup) # heck if dir size has changed metadata_dict = { "folder_size": 0, "last_backup": 0, "folder": folder_to_backup } metadata_file = os.path.join(cache_folder, os.path.basename(folder_to_backup) + ".json") if os.path.isfile(metadata_file): metadata_dict = file_utils.load_dict_json(metadata_file) current_folder_size = file_utils.folder_size(folder_to_backup) if metadata_dict["folder_size"] == current_folder_size: log.info("No Backup since folder size has not changed.") return metadata_dict["folder_size"] = current_folder_size metadata_dict["last_backup"] = int(round(time.time() * 1000)) # Initialize environment with user project and in temp directory env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER) # Only backup if environment is connected if not env.is_connected(): log.warning("Failed to connect to Lab Instance. Cannot backup folder.") env.print_info() return archive_file_path = None backup_key = None try: # If so, package folder to temp dir but ignore files with more than 50 MB and environment directory archive_file_path = file_handler_utils.tar_folder(folder_to_backup, max_file_size=MAX_FILE_SIZE_IN_MB, exclude=["**/environment/*"]) backup_key = env.upload_file(archive_file_path, data_type=env.DataType.BACKUP, track_event=False) except Exception as e: # Failsafe backup print("Failed to backup workspace") print(e) pass if archive_file_path: os.remove(archive_file_path) # remove zip file after upload if backup_key: print(backup_key) # Backup successful # Save folder metadata file_utils.save_dict_json(metadata_file, metadata_dict) # 4. delete backups, keep 3 latest versions env.file_handler.delete_remote_file(env.file_handler.remove_version_from_key(backup_key), keep_latest_versions=KEEP_N_LATEST_BACKUPS)
def get_file(project, unpack, key): """ Returns path to the file for the given KEY (either an storage key or url). If the file is not available locally, download it from the remote storage. """ try: if not unpack: unpack = False from lab_client import Environment file_path = Environment(project=project).get_file(key, unpack=unpack) if file_path: log.info("Downloaded file to " + str(file_path)) except: log.exception("Failed to get file with key: " + str(key) + " from project: " + str(project))
def test_local_file_handling(): log.info("Start local file handling tests.") local_env = Environment(root_folder="temp") local_env.print_info() log.info("Create test resources (files & folders)") file_1_name = "file-1.txt" file_1_path = os.path.join(local_env.datasets_folder, file_1_name) file_1_content = "test" with open(file_1_path, 'w') as f: f.write(file_1_content) # get file from local local_file_key = "datasets/" + file_1_name log.info("Get local file with key " + local_file_key) local_file = local_env.get_file(local_file_key) assert local_file # check file content with open(local_file, 'r') as f: file_content = f.read() assert (file_content == file_1_content)
def restore_backup(folder_to_restore: str): """Restore a folder from a backup.""" if not os.path.exists(folder_to_restore): os.makedirs(folder_to_restore) # Check if folder is empty if os.listdir(folder_to_restore): log.info("Folder " + folder_to_restore + " is not empty, will not attempt to restore backup.") return log.info("Folder " + folder_to_restore + " is empty, will try to restore backup.") # Initialize environment with user project and in temp directory env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER) # 2. If so, download latest backup and unpack into specified folder backup_key = "backups/" + os.path.basename(folder_to_restore) + ".tar" file_path = env.get_file(backup_key) if file_path: file_handler_utils.extract_tar(file_path=file_path, unpack_path=folder_to_restore, remove_if_exists=False) else: log.warning("Failed to get backup with key " + backup_key)
def setup_function(function): global env log.info("Initialize environment") test_project = "test-" + str(int(round(time.time() * 1000))) env = Environment(project=test_project, root_folder="temp", lab_endpoint=LAB_ENDPOINT, lab_api_token=auth_token) env.print_info() log.info("Create test project: " + test_project) env.lab_handler.lab_api.create_project(LabProjectConfig(name=test_project)) log.info("Project created.") env = Environment(project=test_project, root_folder="temp", lab_endpoint=LAB_ENDPOINT, lab_api_token=auth_token) env.print_info()
from subprocess import call import threading import os import logging, sys from unified_model import model_handler from unified_model.server.api_server import run from lab_client import Environment logging.basicConfig(stream=sys.stdout, format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) log = logging.getLogger(__name__) log.info("Starting model service...") env = Environment() env.print_info() default_model_key = os.getenv("MODEL_KEY", "/default_model") def key_resolver(model_key): if os.path.exists(model_key): # model key == path return model_key else: # resolve with lab # also unpack if possible? , unpack=True return env.get_file(model_key) # set key resolver model_handler.key_resolver = key_resolver
if args.mode == "clean": if WORKSPACE_STORAGE_LIMIT != None: try: # Wait for random time (up to 1 hour) so that not all workspaces check at the same time time.sleep(random.randint(0, 60) * 60) log.info("Run storage cleanup check.") max_disk_storage_gb = int(WORKSPACE_STORAGE_LIMIT) inactive_days = jupyterdiskcheck_plugin.get_inactive_days() size_in_gb = jupyterdiskcheck_plugin.get_workspace_size() if inactive_days <= 1: # Backup workspace metadata if user is active -> used in Lab for tracking of activity try: from lab_client import Environment env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER) # Only backup if environment is connected if not env.is_connected(): log.warning("Failed to connect to Lab Instance. Cannot upload metadata backup file.") env.print_info() else: env.upload_file(os.path.join(WORKSPACE_CONFIG_FOLDER , "metadata.json"), data_type=env.DataType.BACKUP, track_event=False) except Exception as e: # Failsafe backup print("Failed to backup workspace metadata.") print(e) pass # only use inactive cleanup if more than 50% of actual limit if size_in_gb and size_in_gb > (max_disk_storage_gb * STORAGE_CLEANUP_THRESHOLD) and inactive_days and inactive_days > LAST_USER_ACTIVITY: # Automatic cleanup