def test_log_exception(self, mock_config_file):
        """Test that the logging of exceptions appear as expected. """
        with patch('gtmcore.logging.LMLogger.CONFIG_INSTALLED_LOCATION',
                   new_callable=PropertyMock,
                   return_value=mock_config_file):
            # NOTE!! This should be the standard way to load the logger in each package.
            # Do NOT use LMLogger().logger because you will lose the stack context and knowledge
            # of source package, method, and line.
            lmlog = LMLogger()
            logger = LMLogger.get_logger()

        try:
            1 / 0
        except ZeroDivisionError as e:
            logger.exception(e)

        with open(lmlog.log_file, 'rt') as test_file:
            data = test_file.readlines()
            for d in data:
                # Make sure it's JSON parseable
                assert json.loads(d)
            # Note sometimes 'exec_info' returns None so we have to do (... or "") to make it an iterable.
            assert any([
                'Traceback (most recent call last)' in (d.get('exc_info')
                                                        or "")
                for d in [json.loads(x) for x in data if x]
            ])
    def test_log_exception_using_error_to_log(self, mock_config_file):
        """Test that the logging of exceptions appear as expected. """
        with patch('gtmcore.logging.LMLogger.CONFIG_INSTALLED_LOCATION',
                   new_callable=PropertyMock,
                   return_value=mock_config_file):
            lmlog = LMLogger()
            logger = LMLogger.get_logger()

        try:
            assert False
        except AssertionError as e:
            # Note, using exc_info=True additionally prints the current stack context.
            logger.error(e, exc_info=True)

        with open(lmlog.log_file, 'rt') as test_file:
            data = test_file.readlines()

            found = False
            for d in data:
                # Make sure it's JSON parseable
                n = json.loads(d)
                assert n, "Loaded JSON entry must not be None"

            assert any([
                'AssertionError' in (d['exc_info'] or "")
                for d in [json.loads(x) for x in data if x]
            ])
Exemple #3
0
def publish_repository(repository: Repository,
                       username: str,
                       access_token: str,
                       remote: Optional[str] = None,
                       public: bool = False,
                       id_token: str = None) -> None:
    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(f"(Job {p}) Starting publish_repository({str(repository)})")

    def update_feedback(msg: str,
                        has_failures: Optional[bool] = None,
                        failure_detail: Optional[str] = None,
                        percent_complete: Optional[float] = None):
        """Method to update the job's metadata and provide feedback to the UI"""
        current_job = get_current_job()
        if not current_job:
            return
        if has_failures:
            current_job.meta['has_failures'] = has_failures
        if failure_detail:
            current_job.meta['failure_detail'] = failure_detail
        if percent_complete:
            current_job.meta['percent_complete'] = percent_complete

        current_job.meta['feedback'] = msg
        current_job.save_meta()

    logger = LMLogger.get_logger()

    try:
        update_feedback("Publish task in queue")
        with repository.lock():
            if isinstance(repository, LabBook):
                wf = LabbookWorkflow(repository)
            else:
                wf = DatasetWorkflow(repository)  # type: ignore
            wf.publish(username=username,
                       access_token=access_token,
                       remote=remote or "origin",
                       public=public,
                       feedback_callback=update_feedback,
                       id_token=id_token)
    except IOError:
        raise
    except Exception as e:
        logger.exception(e)
        raise Exception("Could not publish - try to log out and log in again.")
Exemple #4
0
def sync_repository(repository: Repository, username: str, override: MergeOverride,
                    remote: str = "origin", access_token: str = None,
                    pull_only: bool = False, id_token: str = None) -> int:
    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(f"(Job {p}) Starting sync_repository({str(repository)})")

    def update_meta(msg):
        job = get_current_job()
        if not job:
            return
        if 'feedback' not in job.meta:
            job.meta['feedback'] = msg
        else:
            job.meta['feedback'] = job.meta['feedback'] + f'\n{msg}'
        job.save_meta()

    try:
        with repository.lock():
            if isinstance(repository, LabBook):
                wf = LabbookWorkflow(repository)
            else:
                wf = DatasetWorkflow(repository) # type: ignore
            cnt = wf.sync(username=username, remote=remote, override=override,
                          feedback_callback=update_meta, access_token=access_token,
                          id_token=id_token, pull_only=pull_only)
        logger.info(f"(Job {p} Completed sync_repository with cnt={cnt}")
        return cnt
    except Exception as e:
        logger.exception(f"(Job {p}) Error on sync_repository: {e}")
        raise
Exemple #5
0
def publish_repository(repository: Repository, username: str, access_token: str,
                       remote: Optional[str] = None, public: bool = False, id_token: str = None) -> None:
    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(f"(Job {p}) Starting publish_repository({str(repository)})")

    def update_meta(msg):
        job = get_current_job()
        if not job:
            return
        if 'feedback' not in job.meta:
            job.meta['feedback'] = msg
        else:
            job.meta['feedback'] = job.meta['feedback'] + f'\n{msg}'
        job.save_meta()

    try:
        with repository.lock():
            if isinstance(repository, LabBook):
                wf = LabbookWorkflow(repository)
            else:
                wf = DatasetWorkflow(repository) # type: ignore
            wf.publish(username=username, access_token=access_token, remote=remote or "origin",
                       public=public, feedback_callback=update_meta, id_token=id_token)

    except Exception as e:
        logger.exception(f"(Job {p}) Error on publish_repository: {e}")
        raise
Exemple #6
0
def start_and_run_activity_monitor(module_name, class_name, user, owner, labbook_name, monitor_key, author_name,
                                   author_email, session_metadata):
    """Run method to run the activity monitor. It is a long running job.

        Args:


    Returns:
        0 to indicate no failure
    """
    logger = LMLogger.get_logger()
    logger.info("Starting Activity Monitor `{}` in PID {}".format(class_name, os.getpid()))

    try:
        # Import the monitor class
        m = importlib.import_module(module_name)
        # get the class
        monitor_cls = getattr(m, class_name)

        # Instantiate monitor class
        monitor = monitor_cls(user, owner, labbook_name, monitor_key,
                              author_name=author_name, author_email=author_email)

        # Start the monitor
        monitor.start(session_metadata)

        return 0
    except Exception as e:
        logger.error("Error on start_and_run_activity_monitor in pid {}: {}".format(os.getpid(), e))
        raise e
    def test_init(self, mock_config_file):
        """Test loading a config file explicitly"""
        lmlog = LMLogger(mock_config_file)

        assert type(lmlog) is LMLogger
        assert lmlog.config_file is mock_config_file
        assert type(lmlog.logger) is logging.Logger
Exemple #8
0
def start_labbook_container(root: str, config_path: str, username: Optional[str] = None,
                            override_image_id: Optional[str] = None) -> str:
    """Return the ID of the LabBook Docker container ID.

    Args:
        root: Root directory of labbook
        config_path: Path to config file (labbook.client_config.config_file)
        username: Username of active user
        override_image_id: Force using this name of docker image (do not infer)

    Returns:
        Docker container ID
    """

    logger = LMLogger.get_logger()
    logger.info(f"Starting start_labbook_container(root={root}, config_path={config_path}, username={username}, "
                f"override_image_id={override_image_id}) in pid {os.getpid()}")

    try:
        c_id = start_container(labbook_root=root, config_path=config_path,
                               override_image_id=override_image_id, username=username)
        logger.info(f"Completed start_labbook_container in pid {os.getpid()}: {c_id}")
        return c_id
    except Exception as e:
        logger.error("Error on launch_docker_container in pid {}: {}".format(os.getpid(), e))
        raise
    def test_log(self, mock_config_file):
        """Test logging"""
        with patch('gtmcore.logging.LMLogger.CONFIG_INSTALLED_LOCATION',
                   new_callable=PropertyMock,
                   return_value=mock_config_file):
            lmlog = LMLogger()

        assert type(lmlog) is LMLogger
        assert lmlog.config_file is mock_config_file
        assert type(lmlog.logger) is logging.Logger

        logger = lmlog.logger

        logger.debug("##DE_BUG##")
        logger.info("##IN_FO##")
        logger.warning("##WA_RN##")
        logger.error("##ER_ROR##")

        with open(lmlog.log_file, 'rt') as test_file:
            data = [json.loads(s) for s in test_file.readlines()]

            assert all([d['filename'] == 'test_logging.py' for d in data])

            assert "##IN_FO##" in data[0]['message']
            assert "INFO" == data[0]['levelname']
            assert "##WA_RN##" in data[1]['message']
            assert "WARNING" == data[1]['levelname']
            assert "##ER_ROR##" in data[2]['message']
            assert "ERROR" == data[2]['levelname']
Exemple #10
0
def run_dev_env_monitor(dev_env_name, key) -> int:
    """Run method to check if new Activity Monitors for a given dev env need to be started/stopped

        Args:
            dev_env_name(str): Name of the dev env to monitor
            key(str): The unique string used as the key in redis to track this DevEnvMonitor instance

    Returns:
        0 to indicate no failure
    """

    logger = LMLogger.get_logger()
    logger.debug(
        "Checking Dev Env `{}` for activity monitors in PID {}".format(
            dev_env_name, os.getpid()))

    try:
        demm = DevEnvMonitorManager()
        dev_env = demm.get_monitor_instance(dev_env_name)
        if not dev_env:
            raise ValueError('dev_env is None')
        dev_env.run(key)
        return 0
    except Exception as e:
        logger.error("Error on run_dev_env_monitor in pid {}: {}".format(
            os.getpid(), e))
        raise e
Exemple #11
0
def import_labbook_from_remote(remote_url: str,
                               username: str,
                               config_file: str = None) -> str:
    """Return the root directory of the newly imported Project"""
    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(
        f"(Job {p}) Starting import_labbook_from_remote({remote_url}, {username})"
    )

    def update_meta(msg):
        job = get_current_job()
        if not job:
            return
        if 'feedback' not in job.meta:
            job.meta['feedback'] = msg
        else:
            job.meta['feedback'] = job.meta['feedback'] + f'\n{msg}'
        job.save_meta()

    try:
        toks = remote_url.split("/")
        if len(toks) > 1:
            proj_path = f'{toks[-2]}/{toks[-1].replace(".git", "")}'
        else:
            proj_path = remote_url
        update_meta(f"Importing Project from {proj_path!r}...")
        wf = LabbookWorkflow.import_from_remote(remote_url, username,
                                                config_file)
        update_meta(f"Imported Project {wf.labbook.name}!")
        return wf.labbook.root_dir
    except Exception as e:
        update_meta(f"Could not import Project from {remote_url}.")
        logger.exception(f"(Job {p}) Error on import_labbook_from_remote: {e}")
        raise
    def test_init_load_from_install(self, mock_config_file):
        """Test loading the default file from the installed location"""
        with patch('gtmcore.logging.LMLogger.CONFIG_INSTALLED_LOCATION',
                   new_callable=PropertyMock,
                   return_value=mock_config_file):
            lmlog = LMLogger()

        assert type(lmlog) is LMLogger
        assert lmlog.config_file is mock_config_file
        assert type(lmlog.logger) is logging.Logger
Exemple #13
0
def clean_dataset_file_cache(logged_in_username: str,
                             dataset_owner: str,
                             dataset_name: str,
                             cache_location: str,
                             config_file: str = None) -> None:
    """Method to import a dataset from a zip file

    Args:
        logged_in_username: username for the currently logged in user
        dataset_owner: Owner of the labbook if this dataset is linked
        dataset_name: Name of the labbook if this dataset is linked
        cache_location: Absolute path to the file cache (inside the container) for this dataset
        config_file:

    Returns:
        None
    """
    logger = LMLogger.get_logger()

    p = os.getpid()
    try:
        logger.info(
            f"(Job {p}) Starting clean_dataset_file_cache(logged_in_username={logged_in_username},"
            f"dataset_owner={dataset_owner}, dataset_name={dataset_name}")

        im = InventoryManager(config_file=config_file)

        # Check for dataset
        try:
            im.load_dataset(logged_in_username, dataset_owner, dataset_name)
            logger.info(
                f"{logged_in_username}/{dataset_owner}/{dataset_name} still exists. Skipping file cache clean."
            )
            return
        except InventoryException:
            # Dataset not found, move along
            pass

        # Check for submodule references
        for lb in im.list_labbooks(logged_in_username):
            for ds in im.get_linked_datasets(lb):
                if ds.namespace == dataset_owner and ds.name == dataset_name:
                    logger.info(
                        f"{logged_in_username}/{dataset_owner}/{dataset_name} still referenced by {str(lb)}."
                        f" Skipping file cache clean.")
                    return

        # If you get here the dataset no longer exists and is not used by any projects, clear files
        shutil.rmtree(cache_location)

    except Exception as err:
        logger.error(f"(Job {p}) Error in clean_dataset_file_cache job")
        logger.exception(err)
        raise
Exemple #14
0
def update_unmanaged_dataset_from_remote(logged_in_username: str,
                                         access_token: str, id_token: str,
                                         dataset_owner: str,
                                         dataset_name: str) -> None:
    """Method to update/populate an unmanaged dataset from its remote automatically

    Args:
        logged_in_username: username for the currently logged in user
        access_token: bearer token
        id_token: identity token
        dataset_owner: Owner of the dataset containing the files to download
        dataset_name: Name of the dataset containing the files to download

    Returns:

    """
    def update_meta(msg):
        job = get_current_job()
        if not job:
            return
        if 'feedback' not in job.meta:
            job.meta['feedback'] = msg
        else:
            job.meta['feedback'] = job.meta['feedback'] + f'\n{msg}'
        job.save_meta()

    logger = LMLogger.get_logger()

    try:
        p = os.getpid()
        logger.info(
            f"(Job {p}) Starting update_unmanaged_dataset_from_remote(logged_in_username={logged_in_username},"
            f"dataset_owner={dataset_owner}, dataset_name={dataset_name}")

        im = InventoryManager()
        ds = im.load_dataset(logged_in_username, dataset_owner, dataset_name)

        ds.namespace = dataset_owner
        ds.backend.set_default_configuration(logged_in_username, access_token,
                                             id_token)

        if not isinstance(ds.backend, UnmanagedStorageBackend):
            raise ValueError("Can only auto-update unmanaged dataset types")

        if not ds.backend.can_update_from_remote:
            raise ValueError(
                "Storage backend cannot update automatically from remote.")

        ds.backend.update_from_remote(ds, update_meta)

    except Exception as err:
        logger.exception(err)
        raise
Exemple #15
0
def sync_repository(repository: Repository,
                    username: str,
                    override: MergeOverride,
                    remote: str = "origin",
                    access_token: str = None,
                    pull_only: bool = False,
                    id_token: str = None) -> int:
    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(f"(Job {p}) Starting sync_repository({str(repository)})")

    def update_feedback(msg: str,
                        has_failures: Optional[bool] = None,
                        failure_detail: Optional[str] = None,
                        percent_complete: Optional[float] = None):
        """Method to update the job's metadata and provide feedback to the UI"""
        current_job = get_current_job()
        if not current_job:
            return
        if has_failures:
            current_job.meta['has_failures'] = has_failures
        if failure_detail:
            current_job.meta['failure_detail'] = failure_detail
        if percent_complete:
            current_job.meta['percent_complete'] = percent_complete

        current_job.meta['feedback'] = msg
        current_job.save_meta()

    try:
        update_feedback("Sync task in queue")
        with repository.lock():
            if isinstance(repository, LabBook):
                wf = LabbookWorkflow(repository)
            else:
                wf = DatasetWorkflow(repository)  # type: ignore
            cnt = wf.sync(username=username,
                          remote=remote,
                          override=override,
                          feedback_callback=update_feedback,
                          access_token=access_token,
                          id_token=id_token,
                          pull_only=pull_only)
        logger.info(f"(Job {p} Completed sync_repository with cnt={cnt}")
        return cnt
    except MergeConflict as me:
        logger.exception(f"(Job {p}) Merge conflict: {me}")
        raise
    except IOError:
        raise
    except Exception as e:
        logger.exception(e)
        raise Exception("Could not sync - try to log out and log in again.")
    def test_init_load_from_package(self):
        """Test loading the default file from the package"""
        lmlog = LMLogger()

        assert type(lmlog) is LMLogger

        if os.path.exists(LMLogger.CONFIG_INSTALLED_LOCATION):
            assert lmlog.config_file.rsplit("/", 1)[1] == "logging.json"
        else:
            assert lmlog.config_file.rsplit("/",
                                            1)[1] == "logging.json.default"

        assert type(lmlog.logger) is logging.Logger
Exemple #17
0
def export_dataset_as_zip(dataset_path: str, ds_export_directory: str) -> str:
    """Return path to archive file of exported dataset. """
    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(f"(Job {p}) Starting export_dataset_as_zip({dataset_path})")

    try:
        ds = InventoryManager().load_dataset_from_directory(dataset_path)
        with ds.lock():
            path = ZipExporter.export_dataset(ds.root_dir, ds_export_directory)
        return path
    except Exception as e:
        logger.exception(f"(Job {p}) Error on export_dataset_as_zip: {e}")
        raise
Exemple #18
0
def export_labbook_as_zip(labbook_path: str, lb_export_directory: str) -> str:
    """Return path to archive file of exported labbook. """
    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(f"(Job {p}) Starting export_labbook_as_zip({labbook_path})")

    try:
        lb = InventoryManager().load_labbook_from_directory(labbook_path)
        with lb.lock():
            path = ZipExporter.export_labbook(lb.root_dir, lb_export_directory)
        return path
    except Exception as e:
        logger.exception(f"(Job {p}) Error on export_labbook_as_zip: {e}")
        raise
Exemple #19
0
def build_labbook_image(path: str,
                        username: str,
                        tag: Optional[str] = None,
                        nocache: bool = False) -> str:
    """Return a docker image ID of given LabBook.

    Args:
        path: Pass-through arg to labbook root.
        username: Username of active user.
        tag: Pass-through arg to tag of docker image.
        nocache(bool): Pass-through arg to docker build.

    Returns:
        Docker image ID
    """

    logger = LMLogger.get_logger()
    logger.info(
        f"Starting build_labbook_image({path}, {username}, {tag}, {nocache}) in pid {os.getpid()}"
    )

    try:
        job = get_current_job()
        if job:
            job.meta['pid'] = os.getpid()
            job.save_meta()

        def save_metadata_callback(line: str) -> None:
            try:
                if not line:
                    return
                job.meta['feedback'] = (job.meta.get('feedback')
                                        or '') + line + '\n'
                job.save_meta()
            except Exception as e:
                logger.error(e)

        save_metadata_callback("Build task in queue")
        image_id = build_image(path,
                               override_image_tag=tag,
                               nocache=nocache,
                               username=username,
                               feedback_callback=save_metadata_callback)

        logger.info(
            f"Completed build_labbook_image in pid {os.getpid()}: {image_id}")
        return image_id
    except Exception as e:
        logger.error(f"Error on build_labbook_image in pid {os.getpid()}: {e}")
        raise
Exemple #20
0
def test_sleep(n):
    """Used only for testing -- example method with argument. """
    logger = LMLogger.get_logger()
    logger.info("Starting test_sleep({}) in pid {}".format(n, os.getpid()))

    try:
        job = get_current_job()
        job.meta['sample'] = 'test_sleep metadata'
        job.save_meta()

        time.sleep(n)
        logger.info("Completed test_sleep in pid {}".format(os.getpid()))
        return 0
    except Exception as e:
        logger.error("Error on test_sleep in pid {}: {}".format(os.getpid(), e))
        raise
    def test_load_logger_by_name(self, mock_config_file):
        """Test loading the logger by name rather than by LMLogger.logger. """
        with patch('gtmcore.logging.LMLogger.CONFIG_INSTALLED_LOCATION',
                   new_callable=PropertyMock,
                   return_value=mock_config_file):
            lmlog = LMLogger()

        logger = logging.getLogger("labmanager")
        logger.warning('test_load_logger_by_name')

        with open(lmlog.log_file, 'rt') as test_file:
            data = test_file.readlines()
            for d in data:
                # Make sure it's JSON parseable
                assert json.loads(d)

            assert any(['test_load_logger_by_name' in d for d in data])
Exemple #22
0
def get_logged_in_username() -> str:
    """A Method to get the current logged in user's username


    Returns:
        str
    """
    user = get_logged_in_user()

    if not user:
        logger = LMLogger()
        logger.logger.error(
            "Failed to load a user identity from request context.")
        raise ValueError(
            "Failed to load a user identity from request context.")

    return user.username
Exemple #23
0
def get_logged_in_author():
    """A Method to get the current logged in user's GitAuthor instance


    Returns:
        GitAuthor
    """
    user = get_logged_in_user()

    if not user:
        logger = LMLogger()
        logger.logger.error(
            "Failed to load a user identity from request context.")
        raise ValueError(
            "Failed to load a user identity from request context.")

    # Create a GitAuthor instance if possible

    return GitAuthor(name=user.username, email=user.email)
Exemple #24
0
def hash_dataset_files(logged_in_username: str,
                       dataset_owner: str,
                       dataset_name: str,
                       file_list: List,
                       config_file: str = None) -> None:
    """

    Args:
        logged_in_username: username for the currently logged in user
        dataset_owner: Owner of the labbook if this dataset is linked
        dataset_name: Name of the labbook if this dataset is linked
        file_list: List of files to be hashed
        config_file: Optional config file to use

    Returns:
        None
    """
    logger = LMLogger.get_logger()

    p = os.getpid()
    try:
        logger.info(
            f"(Job {p}) Starting hash_dataset_files(logged_in_username={logged_in_username},"
            f"dataset_owner={dataset_owner}, dataset_name={dataset_name}")

        ds = InventoryManager(config_file=config_file).load_dataset(
            logged_in_username, dataset_owner, dataset_name)
        manifest = Manifest(ds, logged_in_username)

        hash_result, fast_hash_result = manifest.hash_files(file_list)

        job = get_current_job()
        if job:
            job.meta['hash_result'] = ",".join(
                ['None' if v is None else v for v in hash_result])
            job.meta['fast_hash_result'] = ",".join(
                ['None' if v is None else v for v in fast_hash_result])
            job.save_meta()

    except Exception as err:
        logger.error(f"(Job {p}) Error in clean_dataset_file_cache job")
        logger.exception(err)
        raise
Exemple #25
0
def import_dataset_from_zip(archive_path: str,
                            username: str,
                            owner: str,
                            config_file: Optional[str] = None) -> str:
    """Method to import a dataset from a zip file

    Args:
        archive_path(str): Path to the uploaded zip
        username(str): Username
        owner(str): Owner username
        config_file(str): Optional path to a labmanager config file

    Returns:
        str: directory path of imported labbook
    """
    def update_meta(msg):
        job = get_current_job()
        if not job:
            return
        job.meta['feedback'] = msg
        job.save_meta()

    p = os.getpid()
    logger = LMLogger.get_logger()
    logger.info(
        f"(Job {p}) Starting import_dataset_from_zip(archive_path={archive_path},"
        f"username={username}, owner={owner}, config_file={config_file})")

    try:
        lb = ZipExporter.import_dataset(archive_path,
                                        username,
                                        owner,
                                        config_file=config_file,
                                        update_meta=update_meta)
        return lb.root_dir
    except Exception as e:
        logger.exception(
            f"(Job {p}) Error on import_dataset_from_zip({archive_path}): {e}")
        raise
    finally:
        if os.path.exists(archive_path):
            os.remove(archive_path)
Exemple #26
0
def stop_labbook_container(container_id: str):
    """Return a dictionary of metadata pertaining to the given task's Redis key.

    TODO - Take labbook as argument rather than image tag.

    Args:
        image_tag(str): Container to stop

    Returns:
        0 to indicate no failure
    """

    logger = LMLogger.get_logger()
    logger.info(f"Starting stop_labbook_container({container_id}) in pid {os.getpid()}")

    try:
        stop_container(container_id)
        return 0
    except Exception as e:
        logger.error("Error on stop_labbook_container in pid {}: {}".format(os.getpid(), e))
        raise
Exemple #27
0
def test_incr(path):
    logger = LMLogger.get_logger()
    logger.info("Starting test_incr({}) in pid {}".format(path, os.getpid()))

    try:
        amt = 1
        if not os.path.exists(path):
            logger.info("Creating {}".format(path))
            with open(path, 'w') as fp:
                json.dump({'amt': amt}, fp)
        else:
            logger.info("Loading {}".format(path))
            with open(path, 'r') as fp:
                amt_dict = json.load(fp)
            logger.info("Amt = {}")
            with open(path, 'w') as fp:
                amt_dict['amt'] = amt_dict['amt'] + 1
                json.dump(amt_dict, fp)
            logger.info("Set amt = {} in {}".format(amt_dict['amt'], path))
    except Exception as e:
        logger.error("Error on test_incr in pid {}: {}".format(os.getpid(), e))
        raise
Exemple #28
0
import graphene
from confhttpproxy import ProxyRouter

from gtmcore.inventory.inventory import InventoryManager
from gtmcore.labbook.labbook import LabBook
from gtmcore.exceptions import GigantumException
from gtmcore.container.container import ContainerOperations
from gtmcore.mitmproxy.mitmproxy import MITMProxyOperations
from gtmcore.container.jupyter import check_jupyter_reachable, start_jupyter
from gtmcore.container.rserver import start_rserver
from gtmcore.logging import LMLogger
from gtmcore.activity.services import start_labbook_monitor

from lmsrvcore.auth.user import get_logged_in_username, get_logged_in_author

logger = LMLogger.get_logger()


def unique_id() -> str:
    """This is used to, e.g., identify a specific running labbook.

    This allows us to link things like activity monitors, etc.
    It can safely be improved or changed, as consumers should only expect some "random" string."""
    return uuid.uuid4().hex[:10]


class StartDevTool(graphene.relay.ClientIDMutation):
    class Input:
        owner = graphene.String(required=True)
        labbook_name = graphene.String(required=True)
        dev_tool = graphene.String(required=True)
Exemple #29
0
def complete_dataset_upload_transaction(logged_in_username: str,
                                        logged_in_email: str,
                                        dataset_owner: str,
                                        dataset_name: str,
                                        dispatcher,
                                        config_file: str = None) -> None:
    """Method to import a dataset from a zip file

    Args:
        logged_in_username: username for the currently logged in user
        logged_in_email: email for the currently logged in user
        dataset_owner: Owner of the labbook if this dataset is linked
        dataset_name: Name of the labbook if this dataset is linked
        dispatcher: Reference to the dispatcher CLASS
        config_file: config file (used for test mocking)

    Returns:
        None
    """
    logger = LMLogger.get_logger()
    dispatcher_obj = dispatcher()

    def update_feedback(msg: str,
                        has_failures: Optional[bool] = None,
                        failure_detail: Optional[str] = None,
                        percent_complete: Optional[float] = None):
        """Method to update the job's metadata and provide feedback to the UI"""
        current_job = get_current_job()
        if not current_job:
            return
        if has_failures:
            current_job.meta['has_failures'] = has_failures
        if failure_detail:
            current_job.meta['failure_detail'] = failure_detail
        if percent_complete:
            current_job.meta['percent_complete'] = percent_complete

        current_job.meta['feedback'] = msg
        current_job.save_meta()

    def schedule_bg_hash_job():
        """Method to check if a bg job should get scheduled and do so"""
        num_cores = manifest.get_num_hashing_cpus()
        if sum([x.is_running for x in job_list]) < num_cores:
            for j in job_list:
                if j.is_failed is True and j.failure_count < 3:
                    # Re-schedule failed job
                    job_kwargs['file_list'] = j.file_list
                    job_key = dispatcher_obj.dispatch_task(
                        hash_dataset_files,
                        kwargs=job_kwargs,
                        metadata=job_metadata)
                    j.job_key = job_key
                    update_feedback(
                        f"Restarted failed file hashing job. Re-processing"
                        f" {format_size(j.total_bytes)}...")
                    logger.info(
                        f"(Job {p}) Restarted file hash job for"
                        f" {logged_in_username}/{dataset_owner}/{dataset_name}"
                    )
                    break

                if j.is_complete is False and j.is_running is False:
                    # Schedule new job
                    job_kwargs['file_list'] = j.file_list
                    job_key = dispatcher_obj.dispatch_task(
                        hash_dataset_files,
                        kwargs=job_kwargs,
                        metadata=job_metadata)
                    j.job_key = job_key
                    logger.info(
                        f"(Job {p}) Scheduled file hash job for"
                        f" {logged_in_username}/{dataset_owner}/{dataset_name}"
                    )
                    break

    p = os.getpid()
    try:
        logger.info(
            f"(Job {p}) Starting complete_dataset_upload_transaction(logged_in_username={logged_in_username},"
            f"dataset_owner={dataset_owner}, dataset_name={dataset_name}")

        author = GitAuthor(name=logged_in_username, email=logged_in_email)
        dispatcher_obj = Dispatcher()
        ds = InventoryManager(config_file=config_file).load_dataset(
            logged_in_username, dataset_owner, dataset_name, author=author)
        manifest = Manifest(ds, logged_in_username)

        with ds.lock():
            # Detect changes
            status = manifest.status()

            # Collect filenames that need to be hashed
            filenames = copy.deepcopy(status.modified)
            filenames.extend(status.created)

            # If there are new/updated files, spread work across cores while providing reasonable feedback
            if filenames:
                job_list = generate_bg_hash_job_list(filenames, manifest,
                                                     dispatcher_obj)
                total_bytes = sum([x.total_bytes for x in job_list])

                job_kwargs = {
                    'logged_in_username': logged_in_username,
                    'dataset_owner': dataset_owner,
                    'dataset_name': dataset_name,
                    'file_list': list(),
                    'config_file': config_file,
                }
                job_metadata = {
                    'dataset':
                    f"{logged_in_username}|{dataset_owner}|{dataset_name}",
                    'method': 'hash_dataset_files'
                }

                update_feedback(
                    f"Please wait while file contents are analyzed. "
                    f"Processing {format_size(total_bytes)}...",
                    has_failures=False)
                logger.info(
                    f"(Job {p}) Starting file hash processing for"
                    f" {logged_in_username}/{dataset_owner}/{dataset_name} with {len(job_list)} jobs"
                )

                while True:
                    # Check if you need to schedule jobs and schedule up to 1 job per iteration
                    schedule_bg_hash_job()

                    # Refresh all job statuses and update status feedback
                    completed_job_status = [
                        x.refresh_status() for x in job_list
                    ]
                    completed_bytes = sum([
                        s.total_bytes
                        for s, c in zip(job_list, completed_job_status)
                        if c is True
                    ])
                    update_feedback(
                        f"Please wait while file contents are analyzed. "
                        f"{format_size(completed_bytes)} of {format_size(total_bytes)} complete...",
                        percent_complete=(float(completed_bytes) /
                                          float(total_bytes)) * 100)

                    # Check if you are done
                    completed_or_failed = sum([(x.is_complete
                                                or (x.failure_count >= 3))
                                               for x in job_list])
                    if completed_or_failed == len(job_list):
                        break

                    # Update once per second
                    time.sleep(1)

                # Manually complete update process for updated/created files
                failed_files = list()
                for job in job_list:
                    if job.is_complete:
                        for f, h, fh in zip(job.file_list,
                                            job.get_hash_result(),
                                            job.get_fast_hash_result()):
                            if not fh or not h:
                                failed_files.append(f)
                                continue

                            _, file_bytes, mtime = fh.split("||")
                            manifest._manifest_io.add_or_update(
                                f, h, mtime, file_bytes)
                    else:
                        failed_files.extend(job.file_list)

                # Message for hard failures
                if failed_files:
                    detail_msg = f"The following files failed to hash. Try re-uploading the files again:\n"
                    detail_file_list = " \n".join(failed_files)
                    detail_msg = f"{detail_msg}{detail_file_list}"
                    update_feedback(
                        f"An error occurred while processing some files. Check details and re-upload.",
                        has_failures=True,
                        failure_detail=detail_msg)

            if status.deleted:
                manifest.hasher.delete_fast_hashes(status.deleted)
                for relative_path in status.deleted:
                    manifest._manifest_io.remove(relative_path)

            manifest._manifest_io.persist()

            # Complete sweep operation
            manifest.sweep_all_changes(status=status, upload=True)

    except Exception as err:
        logger.error(f"(Job {p}) Error in clean_dataset_file_cache job")
        logger.exception(err)
        raise
Exemple #30
0
def verify_dataset_contents(logged_in_username: str,
                            access_token: str,
                            id_token: str,
                            dataset_owner: str,
                            dataset_name: str,
                            labbook_owner: Optional[str] = None,
                            labbook_name: Optional[str] = None) -> None:
    """Method to update/populate an unmanaged dataset from it local state

    Args:
        logged_in_username: username for the currently logged in user
        access_token: bearer token
        id_token: identity token
        dataset_owner: Owner of the dataset containing the files to download
        dataset_name: Name of the dataset containing the files to download
        labbook_owner: Owner of the labbook if this dataset is linked
        labbook_name: Name of the labbook if this dataset is linked

    Returns:
        None
    """
    job = get_current_job()

    def update_meta(msg):
        if not job:
            return
        if 'feedback' not in job.meta:
            job.meta['feedback'] = msg
        else:
            job.meta['feedback'] = job.meta['feedback'] + f'\n{msg}'
        job.save_meta()

    logger = LMLogger.get_logger()

    try:
        p = os.getpid()
        logger.info(
            f"(Job {p}) Starting verify_dataset_contents(logged_in_username={logged_in_username},"
            f"dataset_owner={dataset_owner}, dataset_name={dataset_name},"
            f"labbook_owner={labbook_owner}, labbook_name={labbook_name}")

        im = InventoryManager()
        if labbook_owner is not None and labbook_name is not None:
            # This is a linked dataset, load repo from the Project
            lb = im.load_labbook(logged_in_username, labbook_owner,
                                 labbook_name)
            dataset_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets',
                                       dataset_owner, dataset_name)
            ds = im.load_dataset_from_directory(dataset_dir)
        else:
            # this is a normal dataset. Load repo from working dir
            ds = im.load_dataset(logged_in_username, dataset_owner,
                                 dataset_name)

        ds.namespace = dataset_owner
        ds.backend.set_default_configuration(logged_in_username, access_token,
                                             id_token)

        result = ds.backend.verify_contents(ds, update_meta)
        job.meta['modified_keys'] = result

    except Exception as err:
        logger.exception(err)
        raise