def backup_folder(folder_to_backup: str, cache_folder: str):
    """Backup a folder to the user project."""

    if not os.path.isdir(folder_to_backup):
        log.warning("Folder does not exist: " + folder_to_backup)

    # heck if dir size has changed
    metadata_dict = {
        "folder_size": 0,
        "last_backup": 0,
        "folder": folder_to_backup
    }

    metadata_file = os.path.join(cache_folder, os.path.basename(folder_to_backup) + ".json")
    if os.path.isfile(metadata_file):
        metadata_dict = file_utils.load_dict_json(metadata_file)

    current_folder_size = file_utils.folder_size(folder_to_backup)
    if metadata_dict["folder_size"] == current_folder_size:
        log.info("No Backup since folder size has not changed.")
        return

    metadata_dict["folder_size"] = current_folder_size
    metadata_dict["last_backup"] = int(round(time.time() * 1000))

    # Initialize environment with user project and in temp directory
    env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER)

    # Only backup if environment is connected
    if not env.is_connected():
        log.warning("Failed to connect to Lab Instance. Cannot backup folder.")
        env.print_info()
        return

    archive_file_path = None
    backup_key = None

    try:
        # If so, package folder to temp dir but ignore files with more than 50 MB and environment directory
        archive_file_path = file_handler_utils.tar_folder(folder_to_backup, max_file_size=MAX_FILE_SIZE_IN_MB,
                                                      exclude=["**/environment/*"])
        backup_key = env.upload_file(archive_file_path, data_type=env.DataType.BACKUP, track_event=False)
    except Exception as e:
        # Failsafe backup
        print("Failed to backup workspace")
        print(e)
        pass

    if archive_file_path:
        os.remove(archive_file_path)  # remove zip file after upload

    if backup_key:
        print(backup_key)
        # Backup successful
        # Save folder metadata
        file_utils.save_dict_json(metadata_file, metadata_dict)

        # 4. delete backups, keep 3 latest versions
        env.file_handler.delete_remote_file(env.file_handler.remove_version_from_key(backup_key),
                                            keep_latest_versions=KEEP_N_LATEST_BACKUPS)
Example #2
0
            log.info("Run storage cleanup check.")
            max_disk_storage_gb = int(WORKSPACE_STORAGE_LIMIT)
            inactive_days = jupyterdiskcheck_plugin.get_inactive_days()
            size_in_gb = jupyterdiskcheck_plugin.get_workspace_size()

            if inactive_days <= 1:
                # Backup workspace metadata if user is active -> used in Lab for tracking of activity
                try:
                    from lab_client import Environment
                    env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER)
                    # Only backup if environment is connected
                    if not env.is_connected():
                        log.warning("Failed to connect to Lab Instance. Cannot upload metadata backup file.")
                        env.print_info()
                    else:
                        env.upload_file(os.path.join(WORKSPACE_CONFIG_FOLDER , "metadata.json"), data_type=env.DataType.BACKUP, track_event=False)
                except Exception as e:
                    # Failsafe backup
                    print("Failed to backup workspace metadata.")
                    print(e)
                    pass

            # only use inactive cleanup if more than 50% of actual limit
            if size_in_gb and size_in_gb > (max_disk_storage_gb * STORAGE_CLEANUP_THRESHOLD) and inactive_days and inactive_days > LAST_USER_ACTIVITY:
                # Automatic cleanup
                log.info("Automatic storage cleanup. Workspace size: " + str(round(size_in_gb)) + " GB. "
                    "Max size: " + str(max_disk_storage_gb) + " GB. Last activity: " + str(inactive_days) + " days ago.")
                try:
                    from lab_client import Environment
                    Environment().cleanup(max_file_size_mb=MAX_FILE_SIZE_MB, last_file_usage=LAST_FILE_USAGE)
                except Exception as ex: