def post(self) -> None:
        """
            POST: returns size of passed directory
        """

        # print('------------- Disk storage checker entered-------------')
        env_storage_limit = "WORKSPACE_STORAGE_LIMIT"

        try:
            result = json.loads(self.request.body.decode('utf-8'))
            max_disk_storage_gb = os.environ.get(env_storage_limit)
            #max_disk_storage_gb = 0

            if max_disk_storage_gb != None:
                max_disk_storage_gb = int(max_disk_storage_gb)

                # run update in background -> somtimes it might need to much time to run
                thread = threading.Thread(target=update_workspace_metadata)
                thread.daemon = True
                thread.start()

                size_in_gb = get_workspace_size()

                if size_in_gb > max_disk_storage_gb:
                    # sleep 50 ms -> metadata file should have been updated, otherwise use old metadata
                    time.sleep(0.05)
                    size_in_gb = get_workspace_size()

                if size_in_gb > (max_disk_storage_gb * 1.5):
                    # Automatic cleanup
                    try:
                        from lab_client import Environment
                        Environment().cleanup()
                    except Exception as ex:
                        log.info("Failed to cleanup enviornment", ex)
                        # TODO: Do not do a full workspace cleanup -> bad side effects
                        # Fallback - clean full workspace folder
                        # cleanup_folder(WORKSPACE_PATH)

                number_shortened = size_in_gb
                if "." in str(size_in_gb):  # quick check if it is decimal
                    number_shortened = str(size_in_gb).split(
                        ".")[0] + '.' + str(size_in_gb).split(".")[1][:2]

                result["workspaceSize"] = number_shortened
                result["restrictedSize"] = max_disk_storage_gb
                result["status"] = 1

                if (size_in_gb > max_disk_storage_gb):
                    print(
                        "You have exceeded the limit of available disk storage assigned to your workspace. Please clean up."
                    )
                    result["status"] = 0

            self.finish(json.dumps(result))

        except Exception as e:
            result['status'] = 3
            print("No Environment variable set for", env_storage_limit)
            self.finish(json.dumps(result))
 def put(self) -> None:
     try:
         from lab_client import Environment
         Environment().cleanup()
     except:
         # Fallback - clean full workspace folder
         cleanup_folder(WORKSPACE_PATH)
def upload_file(project, path, type):
    """ Uploads a file from the PATH to the remote storage. """
    try:
        from lab_client import Environment
        Environment(project=project).upload_file(path, type)
    except:
        log.exception("Failed to upload file " + str(path) + " to project: " +
                      str(project))
def cleanup(project):
    """ Cleanup environment folder to reduce disk space usage.
    Removes all files with more than 50 MB that haven't been used for the last 3 days. """
    try:
        from lab_client import Environment
        Environment(project=project).cleanup()
    except:
        log.exception("Failed to cleanup environment.")
def backup_folder(folder_to_backup: str, cache_folder: str):
    """Backup a folder to the user project."""

    if not os.path.isdir(folder_to_backup):
        log.warning("Folder does not exist: " + folder_to_backup)

    # heck if dir size has changed
    metadata_dict = {
        "folder_size": 0,
        "last_backup": 0,
        "folder": folder_to_backup
    }

    metadata_file = os.path.join(cache_folder, os.path.basename(folder_to_backup) + ".json")
    if os.path.isfile(metadata_file):
        metadata_dict = file_utils.load_dict_json(metadata_file)

    current_folder_size = file_utils.folder_size(folder_to_backup)
    if metadata_dict["folder_size"] == current_folder_size:
        log.info("No Backup since folder size has not changed.")
        return

    metadata_dict["folder_size"] = current_folder_size
    metadata_dict["last_backup"] = int(round(time.time() * 1000))

    # Initialize environment with user project and in temp directory
    env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER)

    # Only backup if environment is connected
    if not env.is_connected():
        log.warning("Failed to connect to Lab Instance. Cannot backup folder.")
        env.print_info()
        return

    archive_file_path = None
    backup_key = None

    try:
        # If so, package folder to temp dir but ignore files with more than 50 MB and environment directory
        archive_file_path = file_handler_utils.tar_folder(folder_to_backup, max_file_size=MAX_FILE_SIZE_IN_MB,
                                                      exclude=["**/environment/*"])
        backup_key = env.upload_file(archive_file_path, data_type=env.DataType.BACKUP, track_event=False)
    except Exception as e:
        # Failsafe backup
        print("Failed to backup workspace")
        print(e)
        pass

    if archive_file_path:
        os.remove(archive_file_path)  # remove zip file after upload

    if backup_key:
        print(backup_key)
        # Backup successful
        # Save folder metadata
        file_utils.save_dict_json(metadata_file, metadata_dict)

        # 4. delete backups, keep 3 latest versions
        env.file_handler.delete_remote_file(env.file_handler.remove_version_from_key(backup_key),
                                            keep_latest_versions=KEEP_N_LATEST_BACKUPS)
def get_file(project, unpack, key):
    """ Returns path to the file for the given KEY (either an storage key or url). If the file is not available locally,
    download it from the remote storage. """
    try:
        if not unpack:
            unpack = False
        from lab_client import Environment
        file_path = Environment(project=project).get_file(key, unpack=unpack)
        if file_path:
            log.info("Downloaded file to " + str(file_path))
    except:
        log.exception("Failed to get file with key: " + str(key) +
                      " from project: " + str(project))
def test_local_file_handling():
    log.info("Start local file handling tests.")
    local_env = Environment(root_folder="temp")
    local_env.print_info()

    log.info("Create test resources (files & folders)")

    file_1_name = "file-1.txt"
    file_1_path = os.path.join(local_env.datasets_folder, file_1_name)
    file_1_content = "test"

    with open(file_1_path, 'w') as f:
        f.write(file_1_content)

    # get file from local
    local_file_key = "datasets/" + file_1_name
    log.info("Get local file with key " + local_file_key)
    local_file = local_env.get_file(local_file_key)
    assert local_file
    # check file content
    with open(local_file, 'r') as f:
        file_content = f.read()
        assert (file_content == file_1_content)
def restore_backup(folder_to_restore: str):
    """Restore a folder from a backup."""

    if not os.path.exists(folder_to_restore):
        os.makedirs(folder_to_restore)

    # Check if folder is empty
    if os.listdir(folder_to_restore):
        log.info("Folder " + folder_to_restore + " is not empty, will not attempt to restore backup.")
        return

    log.info("Folder " + folder_to_restore + " is empty, will try to restore backup.")

    # Initialize environment with user project and in temp directory
    env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER)

    # 2. If so, download latest backup and unpack into specified folder
    backup_key = "backups/" + os.path.basename(folder_to_restore) + ".tar"
    file_path = env.get_file(backup_key)
    if file_path:
        file_handler_utils.extract_tar(file_path=file_path, unpack_path=folder_to_restore, remove_if_exists=False)
    else:
        log.warning("Failed to get backup with key " + backup_key)
def setup_function(function):
    global env
    log.info("Initialize environment")
    test_project = "test-" + str(int(round(time.time() * 1000)))
    env = Environment(project=test_project,
                      root_folder="temp",
                      lab_endpoint=LAB_ENDPOINT,
                      lab_api_token=auth_token)
    env.print_info()

    log.info("Create test project: " + test_project)
    env.lab_handler.lab_api.create_project(LabProjectConfig(name=test_project))
    log.info("Project created.")

    env = Environment(project=test_project,
                      root_folder="temp",
                      lab_endpoint=LAB_ENDPOINT,
                      lab_api_token=auth_token)
    env.print_info()
Example #10
0
from subprocess import call
import threading

import os
import logging, sys
from unified_model import model_handler
from unified_model.server.api_server import run

from lab_client import Environment

logging.basicConfig(stream=sys.stdout, format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger(__name__)

log.info("Starting model service...")

env = Environment()
env.print_info()

default_model_key = os.getenv("MODEL_KEY", "/default_model")

def key_resolver(model_key):
    if os.path.exists(model_key):
        # model key == path
        return model_key
    else:
        # resolve with lab 
        # also unpack if possible? , unpack=True
        return env.get_file(model_key)

# set key resolver
model_handler.key_resolver = key_resolver
Example #11
0
if args.mode == "clean":
    if WORKSPACE_STORAGE_LIMIT != None:
        try:
            # Wait for random time (up to 1 hour) so that not all workspaces check at the same time
            time.sleep(random.randint(0, 60) * 60)
    
            log.info("Run storage cleanup check.")
            max_disk_storage_gb = int(WORKSPACE_STORAGE_LIMIT)
            inactive_days = jupyterdiskcheck_plugin.get_inactive_days()
            size_in_gb = jupyterdiskcheck_plugin.get_workspace_size()

            if inactive_days <= 1:
                # Backup workspace metadata if user is active -> used in Lab for tracking of activity
                try:
                    from lab_client import Environment
                    env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER)
                    # Only backup if environment is connected
                    if not env.is_connected():
                        log.warning("Failed to connect to Lab Instance. Cannot upload metadata backup file.")
                        env.print_info()
                    else:
                        env.upload_file(os.path.join(WORKSPACE_CONFIG_FOLDER , "metadata.json"), data_type=env.DataType.BACKUP, track_event=False)
                except Exception as e:
                    # Failsafe backup
                    print("Failed to backup workspace metadata.")
                    print(e)
                    pass

            # only use inactive cleanup if more than 50% of actual limit
            if size_in_gb and size_in_gb > (max_disk_storage_gb * STORAGE_CLEANUP_THRESHOLD) and inactive_days and inactive_days > LAST_USER_ACTIVITY:
                # Automatic cleanup