Ejemplo n.º 1
0
class TaskController(BaseController):
    """TaskController inherits from BaseController and manages business logic associated with tasks
    within the project.

    Parameters
    ----------
    home : str
        home path of the project

    Attributes
    ----------
    environment : datmo.core.controller.environment.environment.EnvironmentController
        used to create environment if new definition file
    snapshot : datmo.core.controller.snapshot.SnapshotController
        used to create snapshots before and after tasks

    Methods
    -------
    create(dictionary)
        creates a Task object with the permanent parameters
    _run_helper(environment_id, log_filepath, options)
        helper for run to start environment and run with the appropriate parameters
    run(self, id, dictionary=None)
        runs the task and tracks the run, logs, inputs and outputs
    list(session_id=None)
        lists all tasks within the project given filters
    delete(id)
        deletes the specified task from the project
    """

    def __init__(self):
        super(TaskController, self).__init__()
        self.environment = EnvironmentController()
        self.snapshot = SnapshotController()
        self.spinner = Spinner()

        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.task.__init__"))

    def create(self):
        """Create Task object

        Returns
        -------
        Task
            object entity for Task (datmo.core.entity.task.Task)
        """

        # Validate Inputs
        create_dict = {
            "model_id": self.model.id,
            "session_id": self.current_session.id
        }

        try:
            # Create Task
            self.spinner.start()
            task_obj = self.dal.task.create(Task(create_dict))
        finally:
            self.spinner.stop()
        return task_obj

    def _run_helper(self, environment_id, options, log_filepath):
        """Run environment with parameters

        Parameters
        ----------
        environment_id : str
            the environment id for definition
        options : dict
            can include the following values:

            command : list
            ports : list
                Here are some example ports used for common applications.
                   *  'jupyter notebook' - 8888
                   *  flask API - 5000
                   *  tensorboard - 6006
                An example input for the above would be ["8888:8888", "5000:5000", "6006:6006"]
                which maps the running host port (right) to that of the environment (left)
            name : str
            volumes : dict
            mem_limit : str
            workspace : str
            detach : bool
            stdin_open : bool
            tty : bool
        log_filepath : str
            absolute filepath to the log file

        Returns
        -------
        return_code : int
            system return code of the environment that was run
        run_id : str
            id of the environment run (different from environment id)
        logs : str
            output logs from the run
        """
        # Run container with options provided
        run_options = {
            "command": options.get('command', None),
            "ports": options.get('ports', None),
            "name": options.get('name', None),
            "volumes": options.get('volumes', None),
            "mem_limit": options.get('mem_limit', None),
            "gpu": options.get('gpu', False),
            "detach": options.get('detach', False),
            "stdin_open": options.get('stdin_open', False),
            "tty": options.get('tty', False),
            "api": False,
        }
        workspace = options.get('workspace', None)
        self.environment.build(environment_id, workspace)
        # Run container with environment
        return_code, run_id, logs = self.environment.run(
            environment_id, run_options, log_filepath)

        return return_code, run_id, logs

    def _parse_logs_for_results(self, logs):
        """Parse log string to extract results and return dictionary.

        The format of the log line must be "key:value", whitespace will not matter
        and if there are more than 2 items found when split on ":", it will not
        log this as a key/value result

        Note
        ----
        If the same key is found multiple times in the logs, the last occurring
        one will be the one that is saved.

        Parameters
        ----------
        logs : str
            raw string value of output logs

        Returns
        -------
        dict or None
            dictionary to represent results from task
        """
        results = {}
        for line in logs.split("\n"):
            split_line = line.split(":")
            if len(split_line) == 2:
                results[split_line[0].strip()] = split_line[1].strip()
        if results == {}:
            results = None
        return results

    def run(self, task_id, snapshot_dict=None, task_dict=None):
        """Run a task with parameters. If dictionary specified, create a new task with new run parameters.
        Snapshot objects are created before and after the task to keep track of the state. During the run,
        you can access task outputs using environment variable DATMO_TASK_DIR or `/task` which points to
        location for the task files. Create config.json, stats.json and any weights or any file such
        as graphs and visualizations within that directory for quick access

        Parameters
        ----------
        task_id : str
            id for the task you would like to run
        snapshot_dict : dict
            set of parameters to create a snapshot (see SnapshotController for details.
            default is None, which means dictionary with `visible` False will be added to
            hide auto-generated snapshot) NOTE: `visible` False will always be False regardless
            of whether the user provides another value for `visible`.
        task_dict : dict
            set of parameters to characterize the task run
            (default is None, which translate to {}, see datmo.core.entity.task.Task for more details on inputs)

        Returns
        -------
        Task
            the Task object which completed its run with updated parameters

        Raises
        ------
        TaskRunError
            If there is any error in creating files for the task or downstream errors
        """
        # Ensure visible=False is present in the snapshot dictionary
        if not snapshot_dict:
            snapshot_dict = {"visible": False}
        else:
            snapshot_dict['visible'] = False

        if not task_dict:
            task_dict = {}
        # Obtain Task to run
        task_obj = self.dal.task.get_by_id(task_id)

        # Ensure that at least 1 of command, command_list,  or interactive is present in task_dict
        important_task_args = ["command", "command_list", "interactive"]
        if not task_dict.get('command', task_obj.command) and \
            not task_dict.get('command_list', task_obj.command_list) and \
                not task_dict.get('interactive', task_obj.interactive):
            raise RequiredArgumentMissing(
                __("error", "controller.task.run.arg",
                   " or ".join(important_task_args)))

        if task_obj.status is None:
            task_obj.status = "RUNNING"
        else:
            raise TaskRunError(
                __("error", "cli.run.run.already_running", task_obj.id))
        # Create Task directory for user during run
        task_dirpath = os.path.join(".datmo", "tasks", task_obj.id)
        try:
            _ = self.file_driver.create(task_dirpath, directory=True)
        except Exception:
            raise TaskRunError(
                __("error", "controller.task.run", task_dirpath))
        # Create the before snapshot prior to execution
        before_snapshot_dict = snapshot_dict.copy()
        before_snapshot_dict[
            'message'] = "autogenerated snapshot created before task %s is run" % task_obj.id
        before_snapshot_obj = self.snapshot.create(before_snapshot_dict)
        # Update the task with pre-execution parameters, prefer list first then look for string command
        # List command will overwrite a string command if given
        if task_dict.get('command_list', task_obj.command_list):
            task_dict['command'] = " ".join(
                task_dict.get('command_list', task_obj.command_list))
        else:
            if task_dict.get('command', task_obj.command):
                task_dict['command_list'] = shlex.split(
                    task_dict.get('command', task_obj.command))
            elif not task_dict.get('interactive', task_obj.interactive):
                # If it's not interactive then there is not expected task
                raise TaskNoCommandGiven()

        validate("create_task", task_dict)
        task_obj = self.dal.task.update({
            "id":
                task_obj.id,
            "before_snapshot_id":
                task_dict.get('before_snapshot_id', before_snapshot_obj.id),
            "command":
                task_dict.get('command', task_obj.command),
            "command_list":
                task_dict.get('command_list', task_obj.command_list),
            "gpu":
                task_dict.get('gpu', False),
            "mem_limit":
                task_dict.get('mem_limit', None),
            "workspace":
                task_dict.get('workspace', None),
            "interactive":
                task_dict.get('interactive', task_obj.interactive),
            "detach":
                task_dict.get('detach', task_obj.detach),
            "ports":
                task_dict.get('ports', task_obj.ports),
            "task_dirpath":
                task_dict.get('task_dirpath', task_dirpath),
            "log_filepath":
                task_dict.get('log_filepath',
                              os.path.join(task_dirpath, "task.log")),
            "start_time":
                task_dict.get('start_time', datetime.utcnow()),
            "status":
                task_obj.status
        })

        # Copy over files from the before_snapshot file collection to task dir
        file_collection_obj =  \
            self.dal.file_collection.get_by_id(before_snapshot_obj.file_collection_id)
        self.file_driver.copytree(
            os.path.join(self.home, file_collection_obj.path),
            os.path.join(self.home, task_obj.task_dirpath))

        return_code, run_id, logs = 0, None, None

        try:
            # Set the parameters set in the task
            if task_obj.detach and task_obj.interactive:
                raise TaskInteractiveDetachError(
                    __("error", "controller.task.run.args.detach.interactive"))

            environment_run_options = {
                "command": task_obj.command_list,
                "ports": [] if task_obj.ports is None else task_obj.ports,
                "name": "datmo-task-" + self.model.id + "-" + task_obj.id,
                "volumes": {
                    os.path.join(self.home, task_obj.task_dirpath): {
                        'bind': '/task/',
                        'mode': 'rw'
                    },
                    self.home: {
                        'bind': '/home/',
                        'mode': 'rw'
                    }
                },
                "mem_limit": task_obj.mem_limit,
                "workspace": task_obj.workspace,
                "gpu": task_obj.gpu,
                "detach": task_obj.detach,
                "stdin_open": task_obj.interactive,
                "tty": task_obj.interactive,
                "api": False
            }
            # Run environment via the helper function
            return_code, run_id, logs =  \
                self._run_helper(before_snapshot_obj.environment_id,
                                 environment_run_options,
                                 os.path.join(self.home, task_obj.log_filepath))

        except Exception as e:
            return_code = 1
            logs += "Error running task: %" % e.message
        finally:
            # Create the after snapshot after execution is completed with new paths
            after_snapshot_dict = snapshot_dict.copy()
            after_snapshot_dict[
                'message'] = "autogenerated snapshot created after task %s is run" % task_obj.id

            # Add in absolute paths from running task directory
            absolute_task_dir_path = os.path.join(self.home,
                                                  task_obj.task_dirpath)
            absolute_paths = []
            for item in os.listdir(absolute_task_dir_path):
                path = os.path.join(absolute_task_dir_path, item)
                if os.path.isfile(path) or os.path.isdir(path):
                    absolute_paths.append(path)
            after_snapshot_dict.update({
                "paths": absolute_paths,
                "environment_id": before_snapshot_obj.environment_id,
            })
            after_snapshot_obj = self.snapshot.create(after_snapshot_dict)

            # (optional) Remove temporary task directory path
            # Update the task with post-execution parameters
            end_time = datetime.utcnow()
            duration = (end_time - task_obj.start_time).total_seconds()
            update_task_dict = {
                "id": task_obj.id,
                "after_snapshot_id": after_snapshot_obj.id,
                "logs": logs,
                "status": "SUCCESS" if return_code == 0 else "FAILED",
                # "results": task_obj.results, # TODO: update during run
                "end_time": end_time,
                "duration": duration
            }
            if logs is not None:
                update_task_dict["results"] = self._parse_logs_for_results(
                    logs)
            if run_id is not None:
                update_task_dict["run_id"] = run_id
            return self.dal.task.update(update_task_dict)

    def list(self, session_id=None, sort_key=None, sort_order=None):
        query = {}
        if session_id:
            try:
                self.dal.session.get_by_id(session_id)
            except EntityNotFound:
                raise SessionDoesNotExist(
                    __("error", "controller.task.list", session_id))
            query['session_id'] = session_id
        return self.dal.task.query(query, sort_key, sort_order)

    def get(self, task_id):
        """Get task object and return

        Parameters
        ----------
        task_id : str
            id for the task you would like to get

        Returns
        -------
        datmo.core.entity.task.Task
            core task object

        Raises
        ------
        DoesNotExist
            task does not exist
        """
        try:
            return self.dal.task.get_by_id(task_id)
        except EntityNotFound:
            raise DoesNotExist()

    def get_files(self, task_id, mode="r"):
        """Get list of file objects for task id. It will look in the following areas in the following order

        1) look in the after snapshot for file collection
        2) look in the running task file collection
        3) look in the before snapshot for file collection

        Parameters
        ----------
        task_id : str
            id for the task you would like to get file objects for
        mode : str
            file open mode
            (default is "r" to open file for read)

        Returns
        -------
        list
            list of python file objects

        Raises
        ------
        DoesNotExist
            task object does not exist
        PathDoesNotExist
            no file objects exist for the task
        """
        try:
            task_obj = self.dal.task.get_by_id(task_id)
        except EntityNotFound:
            raise DoesNotExist()
        if task_obj.after_snapshot_id:
            # perform number 1) and return file list
            return self.snapshot.get_files(
                task_obj.after_snapshot_id, mode=mode)
        elif task_obj.task_dirpath:
            # perform number 2) and return file list
            return self.file_driver.get(
                task_obj.task_dirpath, mode=mode, directory=True)
        elif task_obj.before_snapshot_id:
            # perform number 3) and return file list
            return self.snapshot.get_files(
                task_obj.before_snapshot_id, mode=mode)
        else:
            # Error because the task does not have any files associated with it
            raise PathDoesNotExist()

    def delete(self, task_id):
        if not task_id:
            raise RequiredArgumentMissing(
                __("error", "controller.task.delete.arg", "id"))
        stopped_success = self.stop(task_id)
        delete_task_success = self.dal.task.delete(task_id)
        return stopped_success and delete_task_success

    def stop(self, task_id=None, all=False, status="STOPPED"):
        """Stop and remove run for the task and update task object statuses

        Parameters
        ----------
        task_id : str, optional
            id for the task you would like to stop
        all : bool, optional
            if specified, will stop all tasks within project

        Returns
        -------
        return_code : bool
            system return code of the stop

        Raises
        ------
        RequiredArgumentMissing
        TooManyArgumentsFound
        """
        if task_id is None and all is False:
            raise RequiredArgumentMissing(
                __("error", "controller.task.stop.arg.missing", "id"))
        if task_id and all:
            raise TooManyArgumentsFound()
        if task_id:
            try:
                task_obj = self.get(task_id)
            except DoesNotExist:
                time.sleep(1)
                task_obj = self.get(task_id)
            task_match_string = "datmo-task-" + self.model.id + "-" + task_id
            # Get the environment id associated with the task
            kwargs = {'match_string': task_match_string}
            # Get the environment from the task
            before_snapshot_id = task_obj.before_snapshot_id
            after_snapshot_id = task_obj.after_snapshot_id
            if not before_snapshot_id and not after_snapshot_id:
                # TODO: remove...for now database may not be in sync. no task that has run can have NO before_snapshot_id
                time.sleep(1)
                task_obj = self.get(task_id)
            if after_snapshot_id:
                after_snapshot_obj = self.snapshot.get(after_snapshot_id)
                kwargs['environment_id'] = after_snapshot_obj.environment_id
            if not after_snapshot_id and before_snapshot_id:
                before_snapshot_obj = self.snapshot.get(before_snapshot_id)
                kwargs['environment_id'] = before_snapshot_obj.environment_id
            return_code = self.environment.stop(**kwargs)
        if all:
            return_code = self.environment.stop(all=True)
        # Set stopped task statuses to STOPPED if return success
        if return_code:
            if task_id:
                self.dal.task.update({"id": task_id, "status": status})
            if all:
                task_objs = self.dal.task.query({})
                for task_obj in task_objs:
                    self.dal.task.update({"id": task_obj.id, "status": status})

        return return_code
Ejemplo n.º 2
0
class TestEnvironmentController():
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system(
        ) == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.project = ProjectController(self.temp_dir)
        self.environment = EnvironmentController(self.temp_dir)

    def teardown_method(self):
        pass

    def test_create(self):
        # 0) Test create when unsupported language given
        # 1) Test create when NO file exists and NO definition path exists
        # 2) Test create when NO file exists and definition path exists
        # 3) Test create when definition path exists and given
        # 4) Test create when file exists and definition path exists
        # 5) Test create when file exists but NO definition path exists
        # 6) Test create when definition path exists and given for NEW definition filepath

        self.project.init("test3", "test description")

        # 0) Test option 0
        try:
            self.environment.create({"language": "java"})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # 1) Test option 1
        failed = False
        try:
            self.environment.create({})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # 2) Test option 2
        environment_obj_1 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_1
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # 3) Test option 3
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj_2 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_2
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_3
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Remove definition filepath
        os.remove(definition_filepath)

        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_4.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_4
        assert environment_obj_4.id
        assert environment_obj_4.driver_type == "docker"
        assert environment_obj_4.file_collection_id
        assert environment_obj_4.definition_filename
        assert environment_obj_4.hardware_info
        assert environment_obj_4.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(
            os.path.join(file_collection_dir, "requirements.txt"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_1.id != environment_obj_4.id

        # 6) Test option 6

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM cloudgear/ubuntu:14.04")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create a new environment obj
        environment_obj_5 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_5.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_5
        assert environment_obj_5.id
        assert environment_obj_5.driver_type == "docker"
        assert environment_obj_5.file_collection_id
        assert environment_obj_5.definition_filename
        assert environment_obj_5.hardware_info
        assert environment_obj_5.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_5.id != environment_obj_1.id
        assert environment_obj_5.id != environment_obj_4.id

    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project.init("test5", "test description")

        # 1) Test option 1
        failed = False
        try:
            _ = self.environment.build("does_not_exist")
        except EntityNotFound:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict)
        result = self.environment.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment.create({})
        result = self.environment.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})
        result = self.environment.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})
        result = self.environment.build(environment_obj_4.id)
        assert result
        assert environment_obj_4.id != environment_obj_1.id

        # teardown
        self.environment.delete(environment_obj_1.id)
        self.environment.delete(environment_obj_4.id)

    def test_run(self):
        # 1) Test run simple command with simple Dockerfile
        # 2) Test run script, with autogenerated definition
        self.project.init("test5", "test description")

        # 1) Test option 1

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment.create({})
        self.environment.build(environment_obj.id)

        run_options = {
            "command": ["python", "script.py"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": {
                self.environment.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

    def test_list(self):
        self.project.init("test4", "test description")

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_path_1, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict_1 = {
            "definition_filepath": definition_path_1,
        }

        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment.home, "Dockerfile2")
        with open(definition_path_2, "w") as f:
            f.write(to_unicode(str("FROM datmo/scikit-opencv")))

        input_dict_2 = {
            "definition_filepath": definition_path_2,
        }

        # Create second environment in the project
        environment_obj_2 = self.environment.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_delete(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        # Delete environment in the project
        result = self.environment.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment.dal.environment.get_by_id(environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    def test_stop(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        # Create environment_driver definition
        env_def_path = os.path.join(self.project.home, "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        _, run_id, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        # Stop the running environment
        return_code = self.environment.stop(run_id)
        assert return_code

        # teardown
        self.environment.delete(environment_obj.id)
Ejemplo n.º 3
0
class TaskController(BaseController):
    """TaskController inherits from BaseController and manages business logic associated with tasks
    within the project.

    Parameters
    ----------
    home : str
        home path of the project

    Attributes
    ----------
    environment : EnvironmentController
        used to create environment if new definition file
    snapshot : SnapshotController
        used to create snapshots before and after tasks

    Methods
    -------
    create(dictionary)
        creates a Task object with the permanent parameters
    _run_helper(environment_id, log_filepath, options)
        helper for run to start environment and run with the appropriate parameters
    run(self, id, dictionary=None)
        runs the task and tracks the run, logs, inputs and outputs
    list(session_id=None)
        lists all tasks within the project given filters
    delete(id)
        deletes the specified task from the project

    """
    def __init__(self, home):
        super(TaskController, self).__init__(home)
        self.environment = EnvironmentController(home)
        self.snapshot = SnapshotController(home)
        if not self.is_initialized:
            raise ProjectNotInitializedException(
                __("error", "controller.task.__init__"))

    def create(self, dictionary):
        """Create Task object

        Parameters
        ----------
        dictionary : dict
            command : str
                full command used

        Returns
        -------
        Task
            object entity for Task (datmo.core.entity.task.Task)
        """

        # Validate Inputs

        create_dict = {
            "model_id": self.model.id,
            "session_id": self.current_session.id
        }

        ## Required args
        required_args = ["command"]
        for required_arg in required_args:
            # Add in any values that are
            if required_arg in dictionary and dictionary[
                    required_arg] is not None:
                create_dict[required_arg] = dictionary[required_arg]
            else:
                raise RequiredArgumentMissing(
                    __("error", "controller.task.create.arg", required_arg))

        # Create Task
        return self.dal.task.create(Task(create_dict))

    def _run_helper(self, environment_id, options, log_filepath):
        """Run environment with parameters

        Parameters
        ----------
        environment_id : str
            the environment id for definition
        options : dict
            can include the following values:

            command : list
            ports : list
                Here are some example ports used for common applications.
                   *  'jupyter notebook' - 8888
                   *  flask API - 5000
                   *  tensorboard - 6006
                An example input for the above would be ["8888:8888", "5000:5000", "6006:6006"]
                which maps the running host port (right) to that of the environment (left)
            name : str
            volumes : dict
            detach : bool
            stdin_open : bool
            tty : bool
            gpu : bool
        log_filepath : str
            absolute filepath to the log file

        Returns
        -------
        return_code : int
            system return code of the environment that was run
        run_id : str
            id of the environment run (different from environment id)
        logs : str
            output logs from the run
        """
        # Run container with options provided
        run_options = {
            "command": options.get('command', None),
            "ports": options.get('ports', None),
            "name": options.get('name', None),
            "volumes": options.get('volumes', None),
            "detach": options.get('detach', False),
            "stdin_open": options.get('stdin_open', False),
            "tty": options.get('tty', False),
            "gpu": options.get('gpu', False),
            "api": False
        }

        # Build image for environment
        self.environment.build(environment_id)

        # Run container with environment
        return_code, run_id, logs = \
            self.environment.run(environment_id, run_options, log_filepath)

        return return_code, run_id, logs

    def _parse_logs_for_results(self, logs):
        """Parse log string to extract results and return dictionary.

        Note
        ----
        If the same key is found multiple times in the logs, the last occurring
        one will be the one that is saved.

        Parameters
        ----------
        logs : str
            raw string value of output logs

        Returns
        -------
        dict
            dictionary to represent results from task
        """
        results = {}
        for line in logs.split("\n"):
            split_line = line.split(":")
            if len(split_line) == 2:
                results[split_line[0].strip()] = split_line[1].strip()
        return results

    def run(self, task_id, snapshot_dict=None, task_dict=None):
        """Run a task with parameters. If dictionary specified, create a new task with new run parameters.
        Snapshot objects are created before and after the task to keep track of the state. During the run,
        you can access task outputs using environment variable DATMO_TASK_DIR or `/task` which points to
        location of datmo_tasks/[task-id]. Create config.json, stats.json and any weights or any file such
        as graphs and visualizations within that directory for quick access

        Parameters
        ----------
        task_id : str
            id for the task you would like to run
        snapshot_dict : dict
            set of parameters to create a snapshot (see SnapshotController for details.
            default is None, which means dictionary with `visible` False will be added to
            hide auto-generated snapshot) NOTE: `visible` False will always be False regardless
            of whether the user provides another value for `visible`.
        task_dict : dict
            set of parameters to characterize the task run
            (default is None, which translate to {}, see datmo.core.entity.task.Task for more details on inputs)

        Returns
        -------
        Task
            the Task object which completed its run with updated parameters

        Raises
        ------
        TaskRunException
            If there is any error in creating files for the task or downstream errors
        """
        # Ensure visible=False is present in the snapshot dictionary
        if not snapshot_dict:
            snapshot_dict = {"visible": False}
        else:
            snapshot_dict['visible'] = False

        if not task_dict:
            task_dict = {}

        # Obtain Task to run
        task_obj = self.dal.task.get_by_id(task_id)

        if task_obj.status == None:
            task_obj.status = 'RUNNING'
        else:
            raise TaskRunException(
                __("error", "cli.task.run.already_running", task_obj.id))

        # Create Task directory for user during run
        task_dirpath = os.path.join("datmo_tasks", task_obj.id)
        try:
            _ = self.file_driver.create(os.path.join("datmo_tasks",
                                                     task_obj.id),
                                        directory=True)
        except:
            raise TaskRunException(
                __("error", "controller.task.run", task_dirpath))

        # Create the before snapshot prior to execution
        before_snapshot_dict = snapshot_dict.copy()
        before_snapshot_dict[
            'message'] = "autogenerated snapshot created before task %s is run" % task_obj.id
        before_snapshot_obj = self.snapshot.create(before_snapshot_dict)

        # Update the task with pre-execution parameters
        task_obj = self.dal.task.update({
            "id":
            task_obj.id,
            "before_snapshot_id":
            task_dict.get('before_snapshot_id', before_snapshot_obj.id),
            "ports":
            task_dict.get('ports', task_obj.ports),
            "gpu":
            task_dict.get('gpu', task_obj.gpu),
            "interactive":
            task_dict.get('interactive', task_obj.interactive),
            "task_dirpath":
            task_dict.get('task_dirpath', task_dirpath),
            "log_filepath":
            task_dict.get('log_filepath', os.path.join(task_dirpath,
                                                       "task.log")),
            "start_time":
            task_dict.get('start_time', datetime.utcnow())
        })

        # Copy over files from the before_snapshot file collection to task dir
        file_collection_obj =  \
            self.dal.file_collection.get_by_id(before_snapshot_obj.file_collection_id)
        self.file_driver.copytree(
            os.path.join(self.home, file_collection_obj.path),
            os.path.join(self.home, task_obj.task_dirpath))

        # Set the parameters set in the task
        environment_run_options = {
            "command": task_obj.command,
            "ports": [] if task_obj.ports is None else task_obj.ports,
            "gpu": task_obj.gpu,
            "name": "datmo-task-" + task_obj.id,
            "volumes": {
                os.path.join(self.home, task_obj.task_dirpath): {
                    'bind': '/task/',
                    'mode': 'rw'
                },
                self.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "detach": task_obj.interactive,
            "stdin_open": task_obj.interactive,
            "tty": False,
            "api": not task_obj.interactive
        }

        # Run environment via the helper function
        return_code, run_id, logs =  \
            self._run_helper(before_snapshot_obj.environment_id,
                             environment_run_options,
                             os.path.join(self.home, task_obj.log_filepath))

        # Create the after snapshot after execution is completed with new filepaths
        after_snapshot_dict = snapshot_dict.copy()
        after_snapshot_dict[
            'message'] = "autogenerated snapshot created after task %s is run" % task_obj.id

        # Add in absolute filepaths from running task directory
        absolute_task_dir_path = os.path.join(self.home, task_obj.task_dirpath)
        absolute_filepaths = []
        for item in os.listdir(absolute_task_dir_path):
            path = os.path.join(absolute_task_dir_path, item)
            if os.path.isfile(path) or os.path.isdir(path):
                absolute_filepaths.append(path)
        after_snapshot_dict.update({
            "filepaths":
            absolute_filepaths,
            "environment_id":
            before_snapshot_obj.environment_id,
        })
        after_snapshot_obj = self.snapshot.create(after_snapshot_dict)

        # (optional) Remove temporary task directory path
        # Update the task with post-execution parameters
        end_time = datetime.utcnow()
        duration = (end_time - task_obj.start_time).total_seconds()
        return self.dal.task.update({
            "id":
            task_obj.id,
            "after_snapshot_id":
            after_snapshot_obj.id,
            "run_id":
            run_id,
            "logs":
            logs,
            "results":
            self._parse_logs_for_results(logs),
            # "results": task_obj.results, # TODO: update during run
            "status":
            "SUCCESS" if return_code == 0 else "FAILED",
            "end_time":
            end_time,
            "duration":
            duration
        })

    def list(self, session_id=None):
        query = {}
        if session_id:
            query['session_id'] = session_id
        return self.dal.task.query(query)

    def get_files(self, task_id, mode="r"):
        """Get list of file objects for task id. It will look in the following areas in the following order

        1) look in the after snapshot for file collection
        2) look in the running task file collection
        3) look in the before snapshot for file collection

        Parameters
        ----------
        task_id : str
            id for the task you would like to get file objects for
        mode : str
            file open mode
            (default is "r" to open file for read)

        Returns
        -------
        list
            list of python file objects

        Raises
        ------
        PathDoesNotExist
            no file objects exist for the task
        """
        task_obj = self.dal.task.get_by_id(task_id)
        if task_obj.after_snapshot_id:
            # perform number 1) and return file list
            after_snapshot_obj = \
                self.dal.snapshot.get_by_id(task_obj.after_snapshot_id)
            file_collection_obj = \
                self.dal.file_collection.get_by_id(after_snapshot_obj.file_collection_id)
            return self.file_driver.\
                get_collection_files(file_collection_obj.filehash, mode=mode)
        elif task_obj.task_dirpath:
            # perform number 2) and return file list
            return self.file_driver.get(task_obj.task_dirpath,
                                        mode=mode,
                                        directory=True)
        elif task_obj.before_snapshot_id:
            # perform number 3) and return file list
            before_snapshot_obj = \
                self.dal.snapshot.get_by_id(task_obj.before_snapshot_id)
            file_collection_obj = \
                self.dal.file_collection.get_by_id(before_snapshot_obj.file_collection_id)
            return self.file_driver. \
                get_collection_files(file_collection_obj.filehash, mode=mode)
        else:
            # Error because the task does not have any files associated with it
            raise PathDoesNotExist()

    def delete(self, task_id):
        if not task_id:
            raise RequiredArgumentMissing(
                __("error", "controller.task.delete.arg", "id"))
        return self.dal.task.delete(task_id)

    def stop(self, task_id):
        """Stop and remove run for the task

        Parameters
        ----------
        task_id : str
            id for the task you would like to stop

        Returns
        -------
        return_code : bool
            system return code of the stop
        """
        if not task_id:
            raise RequiredArgumentMissing(
                __("error", "controller.task.stop.arg", "id"))
        task_obj = self.dal.task.get_by_id(task_id)
        run_id = task_obj.run_id
        return_code = self.environment.stop(run_id)
        return return_code
Ejemplo n.º 4
0
class TestEnvironmentController():
    def setup_method(self):
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        Config().set_home(self.temp_dir)
        self.project_controller = ProjectController()

    def teardown_method(self):
        pass

    def __setup(self):
        self.project_controller.init("test_setup", "test description")
        self.environment_controller = EnvironmentController()
        with open(os.path.join(self.temp_dir, "test.txt"), "wb") as f:
            f.write(to_bytes("hello"))
        self.random_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "test")
        with open(self.random_filepath, "wb") as f:
            f.write(to_bytes("cool"))
        self.definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        with open(self.definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

    def test_init_fail_project_not_init(self):
        Config().set_home(self.temp_dir)
        failed = False
        try:
            EnvironmentController()
        except ProjectNotInitialized:
            failed = True
        assert failed

    def test_get_supported_environments(self):
        self.__setup()
        result = self.environment_controller.get_supported_environments()
        assert result

    def test_setup(self):
        self.project_controller.init("test_setup", "test description")
        self.environment_controller = EnvironmentController()

        # Test success setup once (no files present)
        options = {"name": "xgboost:cpu"}
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == options['name']
        assert result.description == "supported base environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/xgboost:cpu" in open(output_definition_filepath,
                                                "r").read()

        # Test success setup again (files present, but staged)
        options = {"name": "xgboost:cpu"}
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == options['name']
        assert result.description == "supported base environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/xgboost:cpu" in open(output_definition_filepath,
                                                "r").read()

        # Test failure in downstream function (e.g. bad inputs, no name given)
        failed = False
        try:
            self.environment_controller.setup(options={})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Change environment file
        with open(output_definition_filepath, "wb") as f:
            f.write(to_bytes("new content"))

        # Test failure setup (unstaged changes)
        failed = False
        try:
            self.environment_controller.setup(options=options)
        except UnstagedChanges:
            failed = True
        assert failed

    def test_create(self):
        # 0) Test SUCCESS create when definition path exists in project environment directory (no input, no root) -- with hardware file
        # 1) Test SUCCESS create when definition path exists in project environment directory (no input, no root)
        # 5) Test SUCCESS when definition path exists in project environment directory and passed from input dict (takes input)
        # 2) Test SUCCESS create when definition path exists in root project folder (no input, no project environment dir)
        # 3) Test SUCCESS create when definition path is passed into input dict (takes input, no project environment dir)
        # 4) Test SUCCESS create when definition path is passed into input dict along with expected filename to be saved
        # 6) Test FAIL when passing same filepath with same filename into input dict

        self.__setup()

        input_dict_0 = {"name": "test", "description": "test description"}

        # 0) Test option 0 (cannot test hash because hardware is machine-dependent)
        environment_obj_0 = self.environment_controller.create(input_dict_0)
        assert environment_obj_0
        assert isinstance(environment_obj_0, Environment)
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info
        assert environment_obj_0.unique_hash
        assert environment_obj_0.name == "test"
        assert environment_obj_0.description == "test description"

        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_0.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "test"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        output = open(os.path.join(file_collection_dir, "datmoDockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))
        output = open(os.path.join(file_collection_dir, "hardware_info"),
                      "r").read()
        print(repr(output))

        # 1) Test option 1
        environment_obj_0 = self.environment_controller.create(
            input_dict_0, save_hardware_file=False)
        assert environment_obj_0
        assert isinstance(environment_obj_0, Environment)
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_0.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "test"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        output = open(os.path.join(file_collection_dir, "datmoDockerfile"),
                      "r").read()
        print(repr(output))
        assert environment_obj_0.unique_hash == "c309ae4f58163693a91816988d9dc88b"
        assert environment_obj_0.name == "test"
        assert environment_obj_0.description == "test description"
        # Files ["test", "Dockerfile", "datmoDockerfile"]

        # 5) Test option 5
        input_dict_1 = {
            "name": "test",
            "description": "test description",
            "paths": [self.definition_filepath],
        }

        environment_obj = self.environment_controller.create(
            input_dict_1, save_hardware_file=False)
        assert environment_obj
        assert isinstance(environment_obj, Environment)
        assert environment_obj.id
        assert environment_obj.driver_type == "docker"
        assert environment_obj.file_collection_id
        assert environment_obj.definition_filename
        assert environment_obj.hardware_info
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        output = open(os.path.join(file_collection_dir, "datmoDockerfile"),
                      "r").read()
        print(repr(output))
        assert environment_obj.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"
        assert environment_obj.name == "test"
        assert environment_obj.description == "test description"
        # Files ["Dockerfile", "datmoDockerfile"]

        # remove the project environment directory
        shutil.rmtree(
            self.environment_controller.file_driver.environment_directory)

        # Create environment definition in root directory
        home_definition_filepath = os.path.join(
            self.environment_controller.home, "Dockerfile")
        with open(home_definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # 2) Test option 2
        environment_obj_1 = self.environment_controller.create(
            input_dict_0, save_hardware_file=False)

        assert environment_obj_1
        assert isinstance(environment_obj_1, Environment)
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_1.name == "test"
        assert environment_obj_1.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert environment_obj_1.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"

        # 3) Test option 3
        input_dict_2 = {
            "name": "test",
            "description": "test description",
            "paths": [home_definition_filepath],
        }

        # Create environment in the project
        environment_obj_2 = self.environment_controller.create(
            input_dict_2, save_hardware_file=False)

        assert environment_obj_2
        assert isinstance(environment_obj_2, Environment)
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_2.name == "test"
        assert environment_obj_2.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert environment_obj_2.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"

        # 4) Test option 4
        input_dict_3 = {
            "paths": [home_definition_filepath + ">Dockerfile"],
        }

        # Create environment in the project
        environment_obj_3 = self.environment_controller.create(
            input_dict_3, save_hardware_file=False)

        assert environment_obj_3
        assert isinstance(environment_obj_3, Environment)
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_3.name == "test"
        assert environment_obj_3.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert environment_obj_3.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"

        # 6) Test option 6
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")

        input_dict = {
            "paths": [
                definition_filepath + ">Dockerfile",
                definition_filepath + ">Dockerfile"
            ],
        }

        # Create environment in the project
        failed = False
        try:
            _ = self.environment_controller.create(input_dict,
                                                   save_hardware_file=False)
        except FileAlreadyExistsError:
            failed = True

        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        # 1) Test option 1
        failed = False
        try:
            _ = self.environment_controller.build("does_not_exist")
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))
        input_dict = {
            "paths": [definition_filepath],
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment_controller.create(input_dict)
        result = self.environment_controller.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        # Create environment definition in project environment directory
        definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))
        environment_obj_4 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_4.id)
        assert result

        # teardown
        self.environment_controller.delete(environment_obj_1.id)

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_run(self):
        # Test run simple command with simple Dockerfile
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # 0) Test option 0
        # Create environment definition in project environment directory
        definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": None,
            "detach": True,
            "stdin_open": False,
            "mem_limit": "4g",
            "tty": False,
            "api": False
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create({})

        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment_controller.delete(environment_obj.id)
        shutil.rmtree(
            self.environment_controller.file_driver.environment_directory)

        # 1) Test option 1
        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": None,
            "mem_limit": "4g",
            "detach": True,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment_controller.delete(environment_obj.id)

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import os\n"))
            f.write(to_bytes("import sys\n"))
            f.write(to_bytes("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment_controller.create({})
        self.environment_controller.build(environment_obj.id)

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": {
                self.environment_controller.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment_controller.delete(environment_obj.id)

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_interactive_run(self):
        # 1) Test run interactive terminal in environment
        # 2) Test run jupyter notebook in environment
        # Create environment definition
        self.project_controller.init("test6", "test description")
        self.environment_controller = EnvironmentController()

        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM datmo/xgboost:cpu" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)
        # 1) Test option 1
        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": [],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "mem_limit": "4g",
                "detach": True,
                "stdin_open": True,
                "tty": True,
                "api": False
            }

            log_filepath = os.path.join(self.project_controller.home,
                                        "task.log")

            # Build environment in the project
            _ = self.environment_controller.build(environment_obj.id)

            # Run environment in the project
            self.environment_controller.run(environment_obj.id, run_options,
                                            log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # teardown
        self.environment_controller.delete(environment_obj.id)

        # 2) Test option 2
        environment_obj = self.environment_controller.create(input_dict)

        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": ["jupyter", "notebook"],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "mem_limit": "4g",
                "detach": True,
                "stdin_open": False,
                "tty": False,
                "api": False
            }

            log_filepath = os.path.join(self.project_controller.home,
                                        "task.log")

            # Build environment in the project
            _ = self.environment_controller.build(environment_obj.id)

            # Run environment in the project
            self.environment_controller.run(environment_obj.id, run_options,
                                            log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # Stop the running environment
        # self.environment_controller.stop(container_name)

        # teardown
        self.environment_controller.delete(environment_obj.id)

    def test_list(self):
        self.project_controller.init("test4", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment_controller.home,
                                         "Dockerfile")
        with open(definition_path_1, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict_1 = {
            "paths": [definition_path_1],
        }

        # Create environment in the project
        environment_obj_1 = self.environment_controller.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment_controller.home,
                                         "Dockerfile2")
        with open(definition_path_2, "wb") as f:
            f.write(to_bytes("FROM python:3.4-alpine"))

        input_dict_2 = {
            "paths": [definition_path_2 + ">Dockerfile"],
        }

        # Create second environment in the project
        environment_obj_2 = self.environment_controller.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment_controller.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_update(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        # Test success update
        new_name = "test name"
        new_description = "test description"
        result = self.environment_controller.update(
            environment_obj.id, name=new_name, description=new_description)
        assert result
        assert isinstance(result, Environment)
        assert result.name == new_name
        assert result.description == new_description

        # Test failed update
        failed = False
        try:
            self.environment_controller.update("random_id",
                                               name=new_name,
                                               description=new_description)
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_delete(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        # Delete environment in the project
        result = self.environment_controller.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment_controller.dal.environment.get_by_id(
                environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_failure(self):
        # 1) Test failure with RequiredArgumentMissing
        # 2) Test failure with TooManyArgumentsFound
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        # 1) Test option 1
        failed = False
        try:
            self.environment_controller.stop()
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # 2) Test option 2
        failed = False
        try:
            self.environment_controller.stop(run_id="hello",
                                             match_string="there")
        except TooManyArgumentsFound:
            failed = True
        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_success(self):
        # TODO: test more run options
        # 1) Test run_id input to stop
        # 2) Test match_string input to stop
        # 3) Test all input to stop
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # 1) Test option 1

        _, run_id, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(run_id=run_id)

        assert return_code

        # 2) Test option 2
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(
            match_string="datmo-task-" + self.environment_controller.model.id)

        assert return_code

        # 3) Test option 3
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        run_options_2 = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test2",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options_2, log_filepath)
        return_code = self.environment_controller.stop(all=True)

        assert return_code

        # teardown
        self.environment_controller.delete(environment_obj.id)

    def test_exists_env(self):
        # Test failure, not initialized
        failed = False
        try:
            _ = self.environment_controller.create({})
        except:
            failed = True
        assert failed

        # Setup
        self.__setup()
        environment_obj = self.environment_controller.create({})

        # Check by environment id
        result = self.environment_controller.exists(
            environment_id=environment_obj.id)
        assert result

        # Check by unique hash
        result = self.environment_controller.exists(
            environment_unique_hash=environment_obj.unique_hash)
        assert result

        # Test with wrong environment id
        result = self.environment_controller.exists(
            environment_id='test_wrong_env_id')
        assert not result

    def test_calculate_project_environment_hash(self):
        # Setup
        self.__setup()
        # Test hashing the default (with hardware info)
        result = self.environment_controller._calculate_project_environment_hash(
        )
        assert result
        # Test hashing the default Dockerfile
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "c309ae4f58163693a91816988d9dc88b"
        # Test if hash is the same as that of create
        environment_obj = self.environment_controller.create(
            {}, save_hardware_file=False)
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "c309ae4f58163693a91816988d9dc88b"
        assert result == environment_obj.unique_hash

        # Test if the hash is the same if the same file is passed in as an input
        input_dict = {
            "paths": [self.definition_filepath, self.random_filepath]
        }
        environment_obj_1 = self.environment_controller.create(
            input_dict, save_hardware_file=False)
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "c309ae4f58163693a91816988d9dc88b"
        assert result == environment_obj_1.unique_hash

    def test_has_unstaged_changes(self):
        # Setup
        self.__setup()
        _ = self.environment_controller.create({})
        # Check for no unstaged changes
        result = self.environment_controller._has_unstaged_changes()
        assert not result

        # Make a change to the file (update python version)
        with open(
                os.path.join(
                    self.environment_controller.file_driver.
                    environment_directory, "Dockerfile"), "wb") as f:
            f.write(to_bytes("FROM python:3.6-alpine"))

        # Check again, should have unstaged changes
        result = self.environment_controller._has_unstaged_changes()
        assert result

    def test_check_unstaged_changes(self):
        # Setup
        self.__setup()
        obj = self.environment_controller.create({})

        # 1) After commiting the changes
        # Check for no unstaged changes because already committed
        result = self.environment_controller.check_unstaged_changes()
        assert not result

        # Add a new file
        with open(
                os.path.join(
                    self.environment_controller.file_driver.
                    environment_directory, "test2"), "wb") as f:
            f.write(to_bytes("cool"))

        # 2) Not commiting the changes, should error and raise UnstagedChanges
        failed = False
        try:
            self.environment_controller.check_unstaged_changes()
        except UnstagedChanges:
            failed = True
        assert failed

        # Remove new file
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test2"))

        # 3) Files are the same as before but no new commit, should have no unstaged changes
        result = self.environment_controller.check_unstaged_changes()
        assert not result

        # 4) Remove another file, now it is different and should have unstaged changes
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        failed = False
        try:
            self.environment_controller.check_unstaged_changes()
        except UnstagedChanges:
            failed = True
        assert failed

        # 5) Remove the rest of the files, now it is empty and should return as already staged
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        result = self.environment_controller.check_unstaged_changes()
        assert not result

    def test_checkout(self):
        # Setup and create all environment files
        self.__setup()

        # Create environment to checkout to with defaults
        environment_obj = self.environment_controller.create({})

        # Checkout success with there are no unstaged changes
        result = self.environment_controller.checkout(environment_obj.id)
        assert result
        current_hash = self.environment_controller._calculate_project_environment_hash(
        )
        assert environment_obj.unique_hash == current_hash
        # Check the filenames as well because the hash does not take this into account
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "datmoDockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "hardware_info"))

        # Change file contents to make it unstaged
        with open(self.definition_filepath, "wb") as f:
            f.write(to_bytes("new content"))

        # Checkout failure with unstaged changes
        failed = False
        try:
            _ = self.environment_controller.checkout(environment_obj.id)
        except UnstagedChanges:
            failed = True
        assert failed

        # Create new environment to checkout to with defaults (no hardware)
        environment_obj_1 = self.environment_controller.create(
            {}, save_hardware_file=False)

        # Checkout success with there are no unstaged changes
        result = self.environment_controller.checkout(environment_obj.id)
        assert result
        current_hash = self.environment_controller._calculate_project_environment_hash(
        )
        assert environment_obj.unique_hash == current_hash
        assert environment_obj_1.unique_hash != current_hash
        # Check the filenames as well because the hash does not take this into account
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "datmoDockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "hardware_info"))
Ejemplo n.º 5
0
class TestEnvironmentController():
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system(
        ) == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.project = ProjectController(self.temp_dir)
        self.environment = EnvironmentController(self.temp_dir)

    def teardown_method(self):
        pass

    def test_create(self):
        # 0) Test create when unsupported language given
        # 1) Test create when NO file exists and NO definition path exists
        # 2) Test create when NO file exists and definition path exists
        # 3) Test create when definition path exists and given
        # 4) Test create when file exists and definition path exists
        # 5) Test create when file exists but NO definition path exists
        # 6) Test create when definition path exists and given for NEW definition filepath

        self.project.init("test3", "test description")

        # 0) Test option 0
        try:
            self.environment.create({"language": "java"})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # 1) Test option 1
        # Creates environment with python3 based docker image
        environment_obj_0 = self.environment.create({})
        assert environment_obj_0
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # 2) Test option 2
        environment_obj_1 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_1
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # 3) Test option 3
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj_2 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_2
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_3
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Remove definition filepath
        os.remove(definition_filepath)

        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_4.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_4
        assert environment_obj_4.id
        assert environment_obj_4.driver_type == "docker"
        assert environment_obj_4.file_collection_id
        assert environment_obj_4.definition_filename
        assert environment_obj_4.hardware_info
        assert environment_obj_4.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmorequirements.txt"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_1.id != environment_obj_4.id

        # 6) Test option 6

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM cloudgear/ubuntu:14.04")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create a new environment obj
        environment_obj_5 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_5.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_5
        assert environment_obj_5.id
        assert environment_obj_5.driver_type == "docker"
        assert environment_obj_5.file_collection_id
        assert environment_obj_5.definition_filename
        assert environment_obj_5.hardware_info
        assert environment_obj_5.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_5.id != environment_obj_1.id
        assert environment_obj_5.id != environment_obj_4.id

    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project.init("test5", "test description")

        # 1) Test option 1
        failed = False
        try:
            _ = self.environment.build("does_not_exist")
        except EntityNotFound:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict)
        result = self.environment.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment.create({})
        result = self.environment.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})
        result = self.environment.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})
        result = self.environment.build(environment_obj_4.id)
        # 2) Test run script, with autogenerated definition
        assert result
        assert environment_obj_4.id != environment_obj_1.id

        # teardown
        self.environment.delete(environment_obj_1.id)
        self.environment.delete(environment_obj_4.id)

    def test_run(self):
        # 1) Test run simple command with simple Dockerfile
        self.project.init("test5", "test description")

        # 1) Test option 1

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": None,
            "detach": True,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment.create({})
        self.environment.build(environment_obj.id)

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["python", "script.py"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": {
                self.environment.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

    def test_interactive_run(self):
        # 1) Test run interactive terminal in environment
        # 2) Test run jupyter notebook in environment
        # Create environment definition
        self.project.init("test6", "test description")

        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)
        # 1) Test option 1
        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": [],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "detach": True,
                "stdin_open": True,
                "tty": True,
                "api": False
            }

            log_filepath = os.path.join(self.project.home, "task.log")

            # Build environment in the project
            _ = self.environment.build(environment_obj.id)

            # Run environment in the project
            self.environment.run(environment_obj.id, run_options, log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # teardown
        self.environment.delete(environment_obj.id)

        # 2) Test option 2
        environment_obj = self.environment.create(input_dict)

        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": ["jupyter", "notebook"],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "detach": True,
                "stdin_open": False,
                "tty": False,
                "api": False
            }

            log_filepath = os.path.join(self.project.home, "task.log")

            # Build environment in the project
            _ = self.environment.build(environment_obj.id)

            # Run environment in the project
            self.environment.run(environment_obj.id, run_options, log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # Stop the running environment
        # self.environment.stop(container_name)

        # teardown
        self.environment.delete(environment_obj.id)

    def test_list(self):
        self.project.init("test4", "test description")

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_path_1, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict_1 = {
            "definition_filepath": definition_path_1,
        }

        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment.home, "Dockerfile2")
        with open(definition_path_2, "w") as f:
            f.write(to_unicode(str("FROM datmo/scikit-opencv")))

        input_dict_2 = {
            "definition_filepath": definition_path_2,
        }

        # Create second environment in the project
        environment_obj_2 = self.environment.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_delete(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        # Delete environment in the project
        result = self.environment.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment.dal.environment.get_by_id(environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    def test_stop_failure(self):
        # 1) Test failure with RequiredArgumentMissing
        # 2) Test failure with TooManyArgumentsFound

        # 1) Test option 1
        failed = False
        try:
            self.environment.stop()
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # 2) Test option 2
        failed = False
        try:
            self.environment.stop(run_id="hello", match_string="there")
        except TooManyArgumentsFound:
            failed = True
        assert failed

    def test_stop_success(self):
        # TODO: test more run options
        # 1) Test run_id input to stop
        # 2) Test match_string input to stop
        # 3) Test all input to stop
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment.model.id + "-" + "test",
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Create environment definition
        env_def_path = os.path.join(self.project.home, "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # 1) Test option 1

        _, run_id, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment.stop(run_id=run_id)

        assert return_code

        # 2) Test option 2
        _, _, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment.stop(match_string="datmo-task-" +
                                            self.environment.model.id)

        assert return_code

        # 3) Test option 3
        _, _, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)
        run_options_2 = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment.model.id + "-" + "test2",
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }
        _, _, _ = \
            self.environment.run(environment_obj.id, run_options_2, log_filepath)
        return_code = self.environment.stop(all=True)

        assert return_code

        # teardown
        self.environment.delete(environment_obj.id)