Ejemplo n.º 1
0
    def test_delete(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        # Delete environment in the project
        result = self.environment_controller.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment_controller.dal.environment.get_by_id(
                environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True
Ejemplo n.º 2
0
 def teardown_method(self):
     if not check_docker_inactive(test_datmo_dir):
         self.__setup()
         self.environment_controller = EnvironmentController()
         for env_id in list(set(self.environment_ids)):
             if not self.environment_controller.delete(env_id):
                 raise Exception
Ejemplo n.º 3
0
 def delete(self, **kwargs):
     self.environment_controller = EnvironmentController()
     environment_id = kwargs.get('id')
     if self.environment_controller.delete(environment_id):
         self.cli_helper.echo(
             __("info", "cli.environment.delete.success", environment_id))
         return True
Ejemplo n.º 4
0
 def __init__(self, home):
     super(TaskController, self).__init__(home)
     self.environment = EnvironmentController(home)
     self.snapshot = SnapshotController(home)
     if not self.is_initialized:
         raise ProjectNotInitializedException(
             __("error", "controller.task.__init__"))
Ejemplo n.º 5
0
 def __init__(self):
     super(SnapshotController, self).__init__()
     self.code = CodeController()
     self.file_collection = FileCollectionController()
     self.environment = EnvironmentController()
     if not self.is_initialized:
         raise ProjectNotInitialized(
             __("error", "controller.snapshot.__init__"))
Ejemplo n.º 6
0
 def teardown_method(self):
     if not check_docker_inactive(test_datmo_dir):
         self.project_controller = ProjectController()
         if self.project_controller.is_initialized:
             self.environment_controller = EnvironmentController()
             for env_id in list(set(self.environment_ids)):
                 if not self.environment_controller.delete(env_id):
                     raise Exception
Ejemplo n.º 7
0
 def setup_method(self):
     # provide mountable tmp directory for docker
     tempfile.tempdir = "/tmp" if not platform.system(
     ) == "Windows" else None
     test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                     tempfile.gettempdir())
     self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
     self.project = ProjectController(self.temp_dir)
     self.environment = EnvironmentController(self.temp_dir)
Ejemplo n.º 8
0
    def __init__(self):
        super(TaskController, self).__init__()
        self.environment = EnvironmentController()
        self.snapshot = SnapshotController()
        self.spinner = Spinner()

        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.task.__init__"))
Ejemplo n.º 9
0
 def update(self, **kwargs):
     self.environment_controller = EnvironmentController()
     environment_id = kwargs.get('id')
     name = kwargs.get('name', None)
     description = kwargs.get("description", None)
     result = self.environment_controller.update(environment_id,
                                                 name=name,
                                                 description=description)
     return result
Ejemplo n.º 10
0
    def test_setup(self):
        self.project_controller.init("test_setup", "test description")
        self.environment_controller = EnvironmentController()

        # Test success setup once (no files present)
        options = {"name": "xgboost:cpu"}
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == options['name']
        assert result.description == "supported base environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/xgboost:cpu" in open(output_definition_filepath,
                                                "r").read()

        # Test success setup again (files present, but staged)
        options = {"name": "xgboost:cpu"}
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == options['name']
        assert result.description == "supported base environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/xgboost:cpu" in open(output_definition_filepath,
                                                "r").read()

        # Test failure in downstream function (e.g. bad inputs, no name given)
        failed = False
        try:
            self.environment_controller.setup(options={})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Change environment file
        with open(output_definition_filepath, "wb") as f:
            f.write(to_bytes("new content"))

        # Test failure setup (unstaged changes)
        failed = False
        try:
            self.environment_controller.setup(options=options)
        except UnstagedChanges:
            failed = True
        assert failed
Ejemplo n.º 11
0
 def __setup(self):
     self.project_controller.init("test_setup", "test description")
     self.environment_controller = EnvironmentController()
     with open(os.path.join(self.temp_dir, "test.txt"), "wb") as f:
         f.write(to_bytes("hello"))
     self.random_filepath = os.path.join(
         self.environment_controller.file_driver.environment_directory,
         "test")
     with open(self.random_filepath, "wb") as f:
         f.write(to_bytes("cool"))
     self.definition_filepath = os.path.join(
         self.environment_controller.file_driver.environment_directory,
         "Dockerfile")
     with open(self.definition_filepath, "wb") as f:
         f.write(to_bytes("FROM python:3.5-alpine"))
Ejemplo n.º 12
0
 def create(self, **kwargs):
     self.environment_controller = EnvironmentController()
     self.cli_helper.echo(__("info", "cli.environment.create"))
     created_environment_obj = self.environment_controller.create(kwargs)
     environments = self.environment_controller.list()
     for environment_obj in environments:
         if created_environment_obj == environment_obj:
             self.cli_helper.echo(
                 __("info", "cli.environment.create.alreadyexist",
                    created_environment_obj.id))
             return created_environment_obj
     self.cli_helper.echo(
         __("info", "cli.environment.create.success",
            created_environment_obj.id))
     return created_environment_obj
Ejemplo n.º 13
0
 def test_init_fail_project_not_init(self):
     Config().set_home(self.temp_dir)
     failed = False
     try:
         EnvironmentController()
     except ProjectNotInitialized:
         failed = True
     assert failed
Ejemplo n.º 14
0
 def setup(self, **kwargs):
     self.environment_controller = EnvironmentController()
     environment_type = kwargs.get("type", None)
     environment_framework = kwargs.get("framework", None)
     environment_language = kwargs.get("language", None)
     # TODO: remove business logic from here and create common helper
     # environment types
     environment_types = self.environment_controller.get_environment_types()
     if not environment_type or environment_type not in environment_types:
         environment_type = self.cli_helper.prompt_available_options(
             environment_types, option_type="type")
     # environment frameworks
     available_framework_details = self.environment_controller.get_supported_frameworks(
         environment_type)
     available_frameworks = [
         item[0] for item in available_framework_details
     ]
     if not environment_framework or environment_framework not in available_frameworks:
         environment_framework = self.cli_helper.prompt_available_options(
             available_framework_details, option_type="framework")
     # environment languages
     available_environment_languages = self.environment_controller.get_supported_languages(
         environment_type, environment_framework)
     if available_environment_languages and not environment_language or environment_language not in available_environment_languages:
         environment_language = self.cli_helper.prompt_available_options(
             available_environment_languages, option_type="language")
     try:
         options = {
             "environment_type": environment_type,
             "environment_framework": environment_framework,
             "environment_language": environment_language
         }
         environment_obj = self.environment_controller.setup(
             options=options)
         self.cli_helper.echo(
             __("info", "cli.environment.setup.success",
                (environment_obj.name, environment_obj.id)))
         return environment_obj
     except EnvironmentDoesNotExist:
         self.cli_helper.echo(
             __(
                 "error", "cli.environment.setup.argument",
                 "%s:%s-%s" % (environment_framework, environment_type,
                               environment_language)))
Ejemplo n.º 15
0
 def setup(self, **kwargs):
     self.environment_controller = EnvironmentController()
     name = kwargs.get("name", None)
     available_environments = self.environment_controller.get_supported_environments(
     )
     if not name:
         name = self.cli_helper.prompt_available_environments(
             available_environments)
     try:
         options = {"name": name}
         environment_obj = self.environment_controller.setup(
             options=options)
         self.cli_helper.echo(
             __("info", "cli.environment.setup.success",
                (environment_obj.name, environment_obj.id)))
         return environment_obj
     except EnvironmentDoesNotExist:
         self.cli_helper.echo(
             __("error", "cli.environment.setup.argument", name))
Ejemplo n.º 16
0
 def ls(self, **kwargs):
     self.environment_controller = EnvironmentController()
     print_format = kwargs.get('format', "table")
     download = kwargs.get('download', None)
     download_path = kwargs.get('download_path', None)
     environment_objs = self.environment_controller.list()
     header_list = ["id", "created at", "name", "description"]
     item_dict_list = []
     for environment_obj in environment_objs:
         environment_obj_name = printable_object(environment_obj.name)
         environment_obj_description = printable_object(
             environment_obj.description)
         item_dict_list.append({
             "id":
             environment_obj.id,
             "created at":
             prettify_datetime(environment_obj.created_at),
             "name":
             environment_obj_name,
             "description":
             environment_obj_description
         })
     if download:
         if not download_path:
             # download to current working directory with timestamp
             current_time = datetime.utcnow()
             epoch_time = datetime.utcfromtimestamp(0)
             current_time_unix_time_ms = (
                 current_time - epoch_time).total_seconds() * 1000.0
             download_path = os.path.join(
                 self.environment_controller.home,
                 "environment_ls_" + str(current_time_unix_time_ms))
         self.cli_helper.print_items(header_list,
                                     item_dict_list,
                                     print_format=print_format,
                                     output_path=download_path)
         return environment_objs
     self.cli_helper.print_items(header_list,
                                 item_dict_list,
                                 print_format=print_format)
     return environment_objs
Ejemplo n.º 17
0
    def test_stop_failure(self):
        # 1) Test failure with RequiredArgumentMissing
        # 2) Test failure with TooManyArgumentsFound
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        # 1) Test option 1
        failed = False
        try:
            self.environment_controller.stop()
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # 2) Test option 2
        failed = False
        try:
            self.environment_controller.stop(run_id="hello",
                                             match_string="there")
        except TooManyArgumentsFound:
            failed = True
        assert failed
Ejemplo n.º 18
0
    def test_extract_workspace_url(self):
        # Create environment definition
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(
                to_bytes("FROM datmo/python-base:cpu-py27-notebook" +
                         os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))

        image_name = "test"
        input_dict = {"name": image_name, "description": "test description"}
        # Create environment in the project
        environment_obj = self.environment_controller.create(
            input_dict, save_hardware_file=False)
        self.environment_controller.build(environment_obj.id)

        # Test when there is no container being run
        workspace_url = self.environment_controller.extract_workspace_url(
            image_name, "notebook")
        assert workspace_url == None
Ejemplo n.º 19
0
    def test_list(self):
        self.project_controller.init("test4", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment_controller.home,
                                         "Dockerfile")
        with open(definition_path_1, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict_1 = {
            "paths": [definition_path_1],
        }

        # Create environment in the project
        environment_obj_1 = self.environment_controller.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment_controller.home,
                                         "Dockerfile2")
        with open(definition_path_2, "wb") as f:
            f.write(to_bytes("FROM python:3.4-alpine"))

        input_dict_2 = {
            "paths": [definition_path_2 + ">Dockerfile"],
        }

        # Create second environment in the project
        environment_obj_2 = self.environment_controller.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment_controller.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result
Ejemplo n.º 20
0
    def test_update(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        # Test success update
        new_name = "test name"
        new_description = "test description"
        result = self.environment_controller.update(
            environment_obj.id, name=new_name, description=new_description)
        assert result
        assert isinstance(result, Environment)
        assert result.name == new_name
        assert result.description == new_description

        # Test failed update
        failed = False
        try:
            self.environment_controller.update("random_id",
                                               name=new_name,
                                               description=new_description)
        except EnvironmentDoesNotExist:
            failed = True
        assert failed
Ejemplo n.º 21
0
class TestTaskController():
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system() == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.project = ProjectController(self.temp_dir)
        self.project.init("test", "test description")
        self.environment = EnvironmentController(self.temp_dir)
        self.task = TaskController(self.temp_dir)

    def teardown_method(self):
        pass

    def test_create(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_gpu = False
        input_dict = {
            "command": task_command,
            "gpu": task_gpu
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        assert task_obj
        assert task_obj.command == task_command
        assert task_obj.gpu == task_gpu

    def test_run_helper(self):
        # TODO: Try out more options (see below)
        # Create environment_driver id
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        environment_obj = self.environment.create({
            "definition_filepath": env_def_path
        })

        # Set log filepath
        log_filepath = os.path.join(self.task.home,
                                    "test.log")

        # create volume to mount
        temp_test_dirpath = os.path.join(self.temp_dir, "temp")
        os.makedirs(temp_test_dirpath)

        # Test option set 1
        random_name = ''.join([random.choice(string.ascii_letters + string.digits)
                               for _ in range(32)])
        options_dict = {
            "command": ["sh", "-c", "echo accuracy:0.45"],
            "ports": ["8888:8888"],
            "gpu": False,
            "name": random_name,
            "volumes": {
                temp_test_dirpath: {
                    'bind': '/task/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": True,
            "tty": False,
            "api": False
        }

        return_code, run_id, logs = \
            self.task._run_helper(environment_obj.id,
                                  options_dict, log_filepath)
        assert return_code == 0
        assert run_id and \
               self.task.environment_driver.get_container(run_id)
        assert logs and \
               os.path.exists(log_filepath)
        self.task.environment_driver.stop_remove_containers_by_term(term=random_name)

        # Test option set 2
        random_name_2 = ''.join([random.choice(string.ascii_letters + string.digits)
                               for _ in range(32)])
        options_dict = {
            "command": ["sh", "-c", "echo accuracy:0.45"],
            "ports": ["8888:8888"],
            "gpu": False,
            "name": random_name_2 ,
            "volumes": {
                temp_test_dirpath: {
                    'bind': '/task/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": True,
            "tty": False,
            "api": True
        }

        return_code, run_id, logs = \
            self.task._run_helper(environment_obj.id,
                                  options_dict, log_filepath)
        assert return_code == 0
        assert run_id and \
               self.task.environment_driver.get_container(run_id)
        assert logs and \
               os.path.exists(log_filepath)
        self.task.environment_driver.stop_remove_containers_by_term(term=random_name_2)

    def test_parse_logs_for_results(self):
        test_logs = """
        this is a log
        accuracy is good
        accuracy : 0.94
        this did not work
        validation : 0.32
        model_type : logistic regression
        """
        result = self.task._parse_logs_for_results(test_logs)

        assert isinstance(result, dict)
        assert result['accuracy'] == "0.94"
        assert result['validation'] == "0.32"
        assert result['model_type'] == "logistic regression"

    def test_run(self):
        # 1) Test success case with default values and env def file
        # 2) Test failure case if running same task (conflicting containers)
        # 3) Test failure case if running same task with snapshot_dict (conflicting containers)
        # 4) Test success case with snapshot_dict
        # 5) Test success case with saved file during task run

        # TODO: look into log filepath randomness, sometimes logs are not written
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        # Create environment definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # 1) Test option 1
        updated_task_obj = self.task.run(task_obj.id)

        assert task_obj.id == updated_task_obj.id

        assert updated_task_obj.before_snapshot_id
        assert updated_task_obj.ports == None
        assert updated_task_obj.gpu == False
        assert updated_task_obj.interactive == False
        assert updated_task_obj.task_dirpath
        assert updated_task_obj.log_filepath
        assert updated_task_obj.start_time

        assert updated_task_obj.after_snapshot_id
        assert updated_task_obj.run_id
        assert updated_task_obj.logs
        assert "accuracy" in updated_task_obj.logs
        assert updated_task_obj.results
        assert updated_task_obj.results == {"accuracy": "0.45"}
        assert updated_task_obj.status == "SUCCESS"
        assert updated_task_obj.end_time
        assert updated_task_obj.duration

        # 2) Test option 2
        failed = False
        try:
             self.task.run(task_obj.id)
        except TaskRunException:
            failed = True
        assert failed

        # 3) Test option 3

        # Create files to add
        self.project.file_driver.create("dirpath1", directory=True)
        self.project.file_driver.create("dirpath2", directory=True)
        self.project.file_driver.create("filepath1")

        # Snapshot dictionary
        snapshot_dict = {
            "filepaths": [os.path.join(self.project.home, "dirpath1"),
                          os.path.join(self.project.home, "dirpath2"),
                          os.path.join(self.project.home, "filepath1")],
        }

        # Run a basic task in the project
        failed = False
        try:
            self.task.run(task_obj.id,
                          snapshot_dict=snapshot_dict)
        except TaskRunException:
            failed = True
        assert failed

        # Test when the specific task id is already RUNNING
        # Create task in the project
        task_obj_1 = self.task.create(input_dict)
        self.task.dal.task.update({"id": task_obj_1.id, "status": "RUNNING"})
        # Create environment_driver definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        failed = False
        try:
            self.task.run(task_obj_1.id)
        except TaskRunException:
            failed = True
        assert failed

        # 4) Test option 4

        # Create a new task in the project
        task_obj_2 = self.task.create(input_dict)

        # Run another task in the project
        updated_task_obj_2 = self.task.run(task_obj_2.id,
                                           snapshot_dict=snapshot_dict)

        assert task_obj_2.id == updated_task_obj_2.id

        assert updated_task_obj_2.before_snapshot_id
        assert updated_task_obj_2.ports == None
        assert updated_task_obj_2.gpu == False
        assert updated_task_obj_2.interactive == False
        assert updated_task_obj_2.task_dirpath
        assert updated_task_obj_2.log_filepath
        assert updated_task_obj_2.start_time

        assert updated_task_obj_2.after_snapshot_id
        assert updated_task_obj_2.run_id
        assert updated_task_obj_2.logs
        assert "accuracy" in updated_task_obj_2.logs
        assert updated_task_obj_2.results
        assert updated_task_obj_2.results == {"accuracy": "0.45"}
        assert updated_task_obj_2.status == "SUCCESS"
        assert updated_task_obj_2.end_time
        assert updated_task_obj_2.duration

        # 5) Test option 5

        # Create a basic script
        # (fails w/ no environment)
        test_filepath = os.path.join(self.temp_dir, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import os\n"))
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))
            f.write(to_unicode("print(' accuracy: 0.56 ')\n"))
            f.write(to_unicode("with open(os.path.join('/task', 'new_file.txt'), 'a') as f:\n"))
            f.write(to_unicode("    f.write('my test file')\n"))

        task_command = ["python", "script.py"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj_2 = self.task.create(input_dict)

        # Create environment definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        updated_task_obj_2 = self.task.run(task_obj_2.id)

        assert updated_task_obj_2.before_snapshot_id
        assert updated_task_obj_2.ports == None
        assert updated_task_obj_2.gpu == False
        assert updated_task_obj_2.interactive == False
        assert updated_task_obj_2.task_dirpath
        assert updated_task_obj_2.log_filepath
        assert updated_task_obj_2.start_time

        assert updated_task_obj_2.after_snapshot_id
        assert updated_task_obj_2.run_id
        assert updated_task_obj_2.logs
        assert "accuracy" in updated_task_obj_2.logs
        assert updated_task_obj_2.results
        assert updated_task_obj_2.results == {"accuracy": "0.56"}
        assert updated_task_obj_2.status == "SUCCESS"
        assert updated_task_obj_2.end_time
        assert updated_task_obj_2.duration

        # test if after snapshot has the file written
        after_snapshot_obj = self.task.dal.snapshot.get_by_id(
            updated_task_obj_2.after_snapshot_id
        )
        file_collection_obj = self.task.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id
        )
        files_absolute_path = os.path.join(self.task.home, file_collection_obj.path)

        assert os.path.isfile(os.path.join(files_absolute_path, "task.log"))
        assert os.path.isfile(os.path.join(files_absolute_path, "new_file.txt"))

    def test_list(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create tasks in the project
        task_obj_1 = self.task.create(input_dict)
        task_obj_2 = self.task.create(input_dict)

        # List all tasks regardless of filters
        result = self.task.list()

        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result

        # List all tasks and filter by session
        result = self.task.list(session_id=
                                self.project.current_session.id)

        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result

    def test_get_files(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        # Create environment definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # Create file to add
        self.project.file_driver.create("dirpath1", directory=True)
        self.project.file_driver.create(os.path.join("dirpath1", "filepath1"))

        # Snapshot dictionary
        snapshot_dict = {
            "filepaths": [os.path.join(self.project.home, "dirpath1", "filepath1")],
        }

        # Test the default values
        updated_task_obj = self.task.run(task_obj.id,
                                         snapshot_dict=snapshot_dict)

        # TODO: Test case for during run and before_snapshot run
        # Get files for the task after run is complete (default)
        result = self.task.get_files(updated_task_obj.id)

        after_snapshot_obj = self.task.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id
        )
        file_collection_obj = self.task.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id
        )

        assert len(result) == 2
        assert isinstance(result[0], TextIOWrapper)
        assert result[0].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "task.log")
        assert result[0].mode == "r"
        assert isinstance(result[1], TextIOWrapper)
        assert result[1].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "filepath1")
        assert result[1].mode == "r"

        # Get files for the task after run is complete for different mode
        result = self.task.get_files(updated_task_obj.id, mode="a")

        assert len(result) == 2
        assert isinstance(result[0], TextIOWrapper)
        assert result[0].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "task.log")
        assert result[0].mode == "a"
        assert isinstance(result[1], TextIOWrapper)
        assert result[1].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "filepath1")
        assert result[1].mode == "a"

    def test_delete(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create tasks in the project
        task_obj = self.task.create(input_dict)

        # Delete task from the project
        result = self.task.delete(task_obj.id)

        # Check if task retrieval throws error
        thrown = False
        try:
            self.task.dal.snapshot.get_by_id(task_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
               thrown == True

    def test_stop(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        # Create environment driver definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # Test the default values
        updated_task_obj = self.task.run(task_obj.id)

        # Stop the task
        task_id = updated_task_obj.id
        result = self.task.stop(task_id)

        # Check if task stop throws error when wrong task id is given
        thrown = False
        try:
            self.task.dal.snapshot.get_by_id(task_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
               thrown == True
Ejemplo n.º 22
0
class TestProjectController():
    def setup_method(self):
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        Config().set_home(self.temp_dir)
        self.project_controller = ProjectController()
        self.environment_ids = []

    def teardown_method(self):
        if not check_docker_inactive(test_datmo_dir):
            self.project_controller = ProjectController()
            if self.project_controller.is_initialized:
                self.environment_controller = EnvironmentController()
                for env_id in list(set(self.environment_ids)):
                    if not self.environment_controller.delete(env_id):
                        raise Exception

    def test_init_failure_none(self):
        # Test failed case
        failed = False
        try:
            self.project_controller.init(None, None)
        except ValidationFailed:
            failed = True
        assert failed

    def test_init_failure_empty_str(self):
        # Test failed case
        failed = False
        try:
            self.project_controller.init("", "")
        except ValidationFailed:
            failed = True
        assert failed
        assert not self.project_controller.code_driver.is_initialized
        assert not self.project_controller.file_driver.is_initialized

    def test_init_failure_git_code_driver(self):
        # Create a HEAD.lock file in .git to make GitCodeDriver.init() fail
        if self.project_controller.code_driver.type == "git":
            git_dir = os.path.join(
                self.project_controller.code_driver.filepath, ".git")
            os.makedirs(git_dir)
            with open(os.path.join(git_dir, "HEAD.lock"), "a+") as f:
                f.write(to_bytes("test"))
            failed = False
            try:
                self.project_controller.init("test1", "test description")
            except Exception:
                failed = True
            assert failed
            assert not self.project_controller.code_driver.is_initialized
            assert not self.project_controller.file_driver.is_initialized

    def test_init_success(self):
        result = self.project_controller.init("test1", "test description")

        # Tested with is_initialized
        assert self.project_controller.model.name == "test1"
        assert self.project_controller.model.description == "test description"
        assert result and self.project_controller.is_initialized

        # Changeable by user, not tested in is_initialized
        assert self.project_controller.current_session.name == "default"

    # TODO: Test lower level functions (DAL, JSONStore, etc for interruptions)
    # def test_init_with_interruption(self):
    #     # Reinitializing after timed interruption during init
    #     @timeout_decorator.timeout(0.001, use_signals=False)
    #     def timed_init_with_interruption():
    #         result = self.project_controller.init("test1", "test description")
    #         return result
    #
    #     failed = False
    #     try:
    #         timed_init_with_interruption()
    #     except timeout_decorator.timeout_decorator.TimeoutError:
    #         failed = True
    #     # Tested with is_initialized
    #     assert failed
    #
    #     # Reperforming init after a wait of 2 seconds
    #     time.sleep(2)
    #     result = self.project_controller.init("test2", "test description")
    #     # Tested with is_initialized
    #     assert self.project_controller.model.name == "test2"
    #     assert self.project_controller.model.description == "test description"
    #     assert result and self.project_controller.is_initialized
    #
    #     # Changeable by user, not tested in is_initialized
    #     assert self.project_controller.current_session.name == "default"

    def test_init_reinit_failure_empty_str(self):
        _ = self.project_controller.init("test1", "test description")
        failed = True
        try:
            self.project_controller.init("", "")
        except Exception:
            failed = True
        assert failed
        assert self.project_controller.model.name == "test1"
        assert self.project_controller.model.description == "test description"
        assert self.project_controller.code_driver.is_initialized
        assert self.project_controller.file_driver.is_initialized

    def test_init_reinit_success(self):
        _ = self.project_controller.init("test1", "test description")
        # Test out functionality for re-initialize project
        result = self.project_controller.init("anything", "else")

        assert self.project_controller.model.name == "anything"
        assert self.project_controller.model.description == "else"
        assert result == True

    def test_cleanup_no_environment(self):
        self.project_controller.init("test2", "test description")
        result = self.project_controller.cleanup()

        assert not self.project_controller.code_driver.is_initialized
        assert not self.project_controller.file_driver.is_initialized
        # Ensure that containers built with this image do not exist
        # assert not self.project_controller.environment_driver.list_containers(filters={
        #     "ancestor": image_id
        # })
        assert result == True

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_cleanup_with_environment(self):
        self.project_controller.init("test2", "test description")
        result = self.project_controller.cleanup()

        assert not self.project_controller.code_driver.is_initialized
        assert not self.project_controller.file_driver.is_initialized
        assert not self.project_controller.environment_driver.list_images(
            "datmo-test2")
        # Ensure that containers built with this image do not exist
        # assert not self.project_controller.environment_driver.list_containers(filters={
        #     "ancestor": image_id
        # })
        assert result == True

    def test_status_basic(self):
        self.project_controller.init("test3", "test description")
        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test3"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert not current_snapshot
        assert not latest_snapshot_user_generated
        assert not latest_snapshot_auto_generated
        assert unstaged_code  # no files, but unstaged because blank commit id has not yet been created (no initial snapshot)
        assert not unstaged_environment
        assert not unstaged_files

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_status_snapshot_task(self):
        self.project_controller.init("test4", "test description")
        self.snapshot_controller = SnapshotController()
        self.task_controller = TaskController()

        # Create files to add
        self.snapshot_controller.file_driver.create("dirpath1", directory=True)
        self.snapshot_controller.file_driver.create("dirpath2", directory=True)
        self.snapshot_controller.file_driver.create("filepath1")

        # Create environment definition
        env_def_path = os.path.join(self.snapshot_controller.home,
                                    "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        environment_paths = [env_def_path]

        # Create config
        config_filepath = os.path.join(self.snapshot_controller.home,
                                       "config.json")
        with open(config_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        # Create stats
        stats_filepath = os.path.join(self.snapshot_controller.home,
                                      "stats.json")
        with open(stats_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        input_dict = {
            "message":
                "my test snapshot",
            "paths": [
                os.path.join(self.snapshot_controller.home, "dirpath1"),
                os.path.join(self.snapshot_controller.home, "dirpath2"),
                os.path.join(self.snapshot_controller.home, "filepath1")
            ],
            "environment_paths":
                environment_paths,
            "config_filename":
                config_filepath,
            "stats_filename":
                stats_filepath,
        }

        # Create snapshot in the project, then wait, and try status
        first_snapshot = self.snapshot_controller.create(input_dict)

        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test4"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert not current_snapshot  # snapshot was created from other environments and files (so user is not on any current snapshot)
        assert isinstance(latest_snapshot_user_generated, Snapshot)
        assert latest_snapshot_user_generated == first_snapshot
        assert not latest_snapshot_auto_generated
        assert not unstaged_code
        assert not unstaged_environment
        assert not unstaged_files

        # Create and run a task and test if task is shown
        first_task = self.task_controller.create()

        # Create task_dict
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_dict = {"command_list": task_command}

        updated_first_task = self.task_controller.run(
            first_task.id, task_dict=task_dict)
        before_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_first_task.before_snapshot_id)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_first_task.after_snapshot_id)
        before_environment_obj = self.task_controller.dal.environment.get_by_id(
            before_snapshot_obj.environment_id)
        after_environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        assert before_environment_obj == after_environment_obj
        self.environment_ids.append(after_environment_obj.id)

        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test4"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert isinstance(current_snapshot, Snapshot)
        assert isinstance(latest_snapshot_user_generated, Snapshot)
        assert latest_snapshot_user_generated == first_snapshot
        assert isinstance(latest_snapshot_auto_generated, Snapshot)
        # current snapshot is the before snapshot for the run
        assert current_snapshot == before_snapshot_obj
        assert current_snapshot != latest_snapshot_auto_generated
        assert current_snapshot != latest_snapshot_user_generated
        # latest autogenerated snapshot is the after snapshot id
        assert latest_snapshot_auto_generated == after_snapshot_obj
        assert latest_snapshot_auto_generated != latest_snapshot_user_generated
        # user generated snapshot is not associated with any before or after snapshot
        assert latest_snapshot_user_generated != before_snapshot_obj
        assert latest_snapshot_user_generated != after_snapshot_obj
        assert not unstaged_code
        assert not unstaged_environment
        assert not unstaged_files
Ejemplo n.º 23
0
    def init(self, name, description):
        """Initialize command

        Parameters
        ----------
        name : str
            name for the project
        description : str
            description of the project

        Returns
        -------
        datmo.core.entity.model.Model
        """
        # Check if project already exists
        is_new_model = False
        if not self.project_controller.model:
            is_new_model = True

        if is_new_model:  # Initialize a new project
            self.cli_helper.echo(
                __("info", "cli.project.init.create",
                   {"path": self.project_controller.home}))
            if not name:
                _, default_name = os.path.split(self.project_controller.home)
                name = self.cli_helper.prompt(__("prompt",
                                                 "cli.project.init.name"),
                                              default=default_name)
            if not description:
                description = self.cli_helper.prompt(
                    __("prompt", "cli.project.init.description"))
            try:
                success = self.project_controller.init(name, description)
                if success:
                    self.cli_helper.echo(
                        __("info", "cli.project.init.create.success", {
                            "name": name,
                            "path": self.project_controller.home
                        }))
            except Exception:
                self.cli_helper.echo(
                    __("info", "cli.project.init.create.failure", {
                        "name": name,
                        "path": self.project_controller.home
                    }))
                return None
        else:  # Update the current project
            self.cli_helper.echo(
                __(
                    "info", "cli.project.init.update", {
                        "name": self.project_controller.model.name,
                        "path": self.project_controller.home
                    }))
            # Prompt for the name and description and add default if not given
            if not name:
                name = self.cli_helper.prompt(
                    __("prompt", "cli.project.init.name"),
                    default=self.project_controller.model.name)
            if not description:
                description = self.cli_helper.prompt(
                    __("prompt", "cli.project.init.description"),
                    default=self.project_controller.model.description)
            # Update the project with the values given
            try:
                success = self.project_controller.init(name, description)
                if success:
                    self.cli_helper.echo(
                        __("info", "cli.project.init.update.success", {
                            "name": name,
                            "path": self.project_controller.home
                        }))
            except Exception:
                self.cli_helper.echo(
                    __("info", "cli.project.init.update.failure", {
                        "name": name,
                        "path": self.project_controller.home
                    }))
                return None

        self.cli_helper.echo("")

        # Print out simple project meta data
        for k, v in self.project_controller.model.to_dictionary().items():
            if k != "config":
                self.cli_helper.echo(str(k) + ": " + str(v))
        # Ask question if the user would like to setup environment
        environment_setup = self.cli_helper.prompt_bool(
            __("prompt", "cli.project.environment.setup"))
        if environment_setup:
            # TODO: ramove business logic from here and create common helper
            # Setting up the environment definition file
            self.environment_controller = EnvironmentController()
            environment_types = self.environment_controller.get_environment_types(
            )
            environment_type = self.cli_helper.prompt_available_options(
                environment_types, option_type="type")
            available_environment_frameworks = self.environment_controller.get_supported_frameworks(
                environment_type)
            environment_framework = self.cli_helper.prompt_available_options(
                available_environment_frameworks, option_type="framework")
            available_environment_languages = self.environment_controller.get_supported_languages(
                environment_type, environment_framework)
            environment_language = self.cli_helper.prompt_available_options(
                available_environment_languages, option_type="language")
            options = {
                "environment_type": environment_type,
                "environment_framework": environment_framework,
                "environment_language": environment_language
            }
            environment_obj = self.environment_controller.setup(
                options=options)
            self.cli_helper.echo(
                __("info", "cli.environment.setup.success",
                   (environment_obj.name, environment_obj.id)))
        else:
            self.cli_helper.echo(
                "there was no environment setup. you can get information"
                " here: https://datmo.readthedocs.io/en/latest/env-setup.html")

        return self.project_controller.model
Ejemplo n.º 24
0
class ProjectCommand(BaseCommand):
    def __init__(self, cli_helper):
        super(ProjectCommand, self).__init__(cli_helper)
        self.project_controller = ProjectController()

    def init(self, name, description):
        """Initialize command

        Parameters
        ----------
        name : str
            name for the project
        description : str
            description of the project

        Returns
        -------
        datmo.core.entity.model.Model
        """
        # Check if project already exists
        is_new_model = False
        if not self.project_controller.model:
            is_new_model = True

        if is_new_model:  # Initialize a new project
            self.cli_helper.echo(
                __("info", "cli.project.init.create",
                   {"path": self.project_controller.home}))
            if not name:
                _, default_name = os.path.split(self.project_controller.home)
                name = self.cli_helper.prompt(__("prompt",
                                                 "cli.project.init.name"),
                                              default=default_name)
            if not description:
                description = self.cli_helper.prompt(
                    __("prompt", "cli.project.init.description"))
            try:
                success = self.project_controller.init(name, description)
                if success:
                    self.cli_helper.echo(
                        __("info", "cli.project.init.create.success", {
                            "name": name,
                            "path": self.project_controller.home
                        }))
            except Exception:
                self.cli_helper.echo(
                    __("info", "cli.project.init.create.failure", {
                        "name": name,
                        "path": self.project_controller.home
                    }))
                return None
        else:  # Update the current project
            self.cli_helper.echo(
                __(
                    "info", "cli.project.init.update", {
                        "name": self.project_controller.model.name,
                        "path": self.project_controller.home
                    }))
            # Prompt for the name and description and add default if not given
            if not name:
                name = self.cli_helper.prompt(
                    __("prompt", "cli.project.init.name"),
                    default=self.project_controller.model.name)
            if not description:
                description = self.cli_helper.prompt(
                    __("prompt", "cli.project.init.description"),
                    default=self.project_controller.model.description)
            # Update the project with the values given
            try:
                success = self.project_controller.init(name, description)
                if success:
                    self.cli_helper.echo(
                        __("info", "cli.project.init.update.success", {
                            "name": name,
                            "path": self.project_controller.home
                        }))
            except Exception:
                self.cli_helper.echo(
                    __("info", "cli.project.init.update.failure", {
                        "name": name,
                        "path": self.project_controller.home
                    }))
                return None

        self.cli_helper.echo("")

        # Print out simple project meta data
        for k, v in self.project_controller.model.to_dictionary().items():
            if k != "config":
                self.cli_helper.echo(str(k) + ": " + str(v))
        # Ask question if the user would like to setup environment
        environment_setup = self.cli_helper.prompt_bool(
            __("prompt", "cli.project.environment.setup"))
        if environment_setup:
            # TODO: ramove business logic from here and create common helper
            # Setting up the environment definition file
            self.environment_controller = EnvironmentController()
            environment_types = self.environment_controller.get_environment_types(
            )
            environment_type = self.cli_helper.prompt_available_options(
                environment_types, option_type="type")
            available_environment_frameworks = self.environment_controller.get_supported_frameworks(
                environment_type)
            environment_framework = self.cli_helper.prompt_available_options(
                available_environment_frameworks, option_type="framework")
            available_environment_languages = self.environment_controller.get_supported_languages(
                environment_type, environment_framework)
            environment_language = self.cli_helper.prompt_available_options(
                available_environment_languages, option_type="language")
            options = {
                "environment_type": environment_type,
                "environment_framework": environment_framework,
                "environment_language": environment_language
            }
            environment_obj = self.environment_controller.setup(
                options=options)
            self.cli_helper.echo(
                __("info", "cli.environment.setup.success",
                   (environment_obj.name, environment_obj.id)))
        else:
            self.cli_helper.echo(
                "there was no environment setup. you can get information"
                " here: https://datmo.readthedocs.io/en/latest/env-setup.html")

        return self.project_controller.model

    def version(self):
        return self.cli_helper.echo("datmo version: %s" % __version__)

    @Helper.notify_no_project_found
    def status(self):
        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()
        # Print out simple project meta data
        for k, v in status_dict.items():
            if k != "config":
                self.cli_helper.echo(str(k) + ": " + str(v))

        self.cli_helper.echo("")

        # Print out any unstaged changes else print out the latest snapshot state of the repository
        if not unstaged_code and not unstaged_environment and not unstaged_files:
            self.cli_helper.echo(
                "all changes have been saved, no unstaged changes")
            self.cli_helper.echo("")
            self.cli_helper.echo("current snapshot state of the repository: ")
            if current_snapshot:
                self.cli_helper.echo(current_snapshot)
        else:
            # Print out the unstaged components if unstaged
            self.cli_helper.echo("unstaged changes since latest snapshot:")
            if unstaged_code:
                self.cli_helper.echo("code has been changed")
            if unstaged_environment:
                self.cli_helper.echo("environment has been changed")
            if unstaged_files:
                self.cli_helper.echo("files have been changed")

        # Print out info for the latest snapshot (the most recent first, and state if autogenerated or by user)
        if latest_snapshot_user_generated and not latest_snapshot_auto_generated:
            self.cli_helper.echo("latest snapshot generated by the user: "******"no snapshot autogenerated by datmo")
        elif latest_snapshot_auto_generated and not latest_snapshot_user_generated:
            self.cli_helper.echo("latest snapshot autogenerated by datmo: ")
            self.cli_helper.echo(latest_snapshot_auto_generated)
            self.cli_helper.echo("no snapshot generated by the user")
        elif not latest_snapshot_user_generated and not latest_snapshot_auto_generated:
            self.cli_helper.echo("no snapshots created yet")
        elif latest_snapshot_user_generated.created_at > latest_snapshot_auto_generated.created_at:
            self.cli_helper.echo("latest snapshot generated by the user: "******"latest snapshot autogenerated by datmo: ")
            self.cli_helper.echo(latest_snapshot_auto_generated)
        elif latest_snapshot_user_generated.created_at < latest_snapshot_auto_generated.created_at:
            self.cli_helper.echo("latest snapshot autogenerated by datmo: ")
            self.cli_helper.echo(latest_snapshot_auto_generated)
            self.cli_helper.echo("latest snapshot generated by the user: "******"prompt", "cli.project.cleanup.confirm"))

        # Cleanup datmo project if user specifies
        if response:
            self.cli_helper.echo(
                __(
                    "info", "cli.project.cleanup", {
                        "name": self.project_controller.model.name,
                        "path": self.project_controller.home
                    }))
            try:
                success = self.project_controller.cleanup()
                if success:
                    self.cli_helper.echo(
                        __(
                            "info", "cli.project.cleanup.success", {
                                "name": self.project_controller.model.name,
                                "path": self.project_controller.home
                            }))
                return success
            except Exception:
                self.cli_helper.echo(
                    __(
                        "info", "cli.project.cleanup.failure", {
                            "name": self.project_controller.model.name,
                            "path": self.project_controller.home
                        }))
        return False
Ejemplo n.º 25
0
class TaskController(BaseController):
    """TaskController inherits from BaseController and manages business logic associated with tasks
    within the project.

    Parameters
    ----------
    home : str
        home path of the project

    Attributes
    ----------
    environment : datmo.core.controller.environment.environment.EnvironmentController
        used to create environment if new definition file
    snapshot : datmo.core.controller.snapshot.SnapshotController
        used to create snapshots before and after tasks

    Methods
    -------
    create(dictionary)
        creates a Task object with the permanent parameters
    _run_helper(environment_id, log_filepath, options)
        helper for run to start environment and run with the appropriate parameters
    run(self, id, dictionary=None)
        runs the task and tracks the run, logs, inputs and outputs
    list(session_id=None)
        lists all tasks within the project given filters
    delete(id)
        deletes the specified task from the project
    """

    def __init__(self):
        super(TaskController, self).__init__()
        self.environment = EnvironmentController()
        self.snapshot = SnapshotController()
        self.spinner = Spinner()

        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.task.__init__"))

    def create(self):
        """Create Task object

        Returns
        -------
        Task
            object entity for Task (datmo.core.entity.task.Task)
        """

        # Validate Inputs
        create_dict = {
            "model_id": self.model.id,
            "session_id": self.current_session.id
        }

        try:
            # Create Task
            self.spinner.start()
            task_obj = self.dal.task.create(Task(create_dict))
        finally:
            self.spinner.stop()
        return task_obj

    def _run_helper(self, environment_id, options, log_filepath):
        """Run environment with parameters

        Parameters
        ----------
        environment_id : str
            the environment id for definition
        options : dict
            can include the following values:

            command : list
            ports : list
                Here are some example ports used for common applications.
                   *  'jupyter notebook' - 8888
                   *  flask API - 5000
                   *  tensorboard - 6006
                An example input for the above would be ["8888:8888", "5000:5000", "6006:6006"]
                which maps the running host port (right) to that of the environment (left)
            name : str
            volumes : dict
            mem_limit : str
            workspace : str
            detach : bool
            stdin_open : bool
            tty : bool
        log_filepath : str
            absolute filepath to the log file

        Returns
        -------
        return_code : int
            system return code of the environment that was run
        run_id : str
            id of the environment run (different from environment id)
        logs : str
            output logs from the run
        """
        # Run container with options provided
        run_options = {
            "command": options.get('command', None),
            "ports": options.get('ports', None),
            "name": options.get('name', None),
            "volumes": options.get('volumes', None),
            "mem_limit": options.get('mem_limit', None),
            "gpu": options.get('gpu', False),
            "detach": options.get('detach', False),
            "stdin_open": options.get('stdin_open', False),
            "tty": options.get('tty', False),
            "api": False,
        }
        workspace = options.get('workspace', None)
        self.environment.build(environment_id, workspace)
        # Run container with environment
        return_code, run_id, logs = self.environment.run(
            environment_id, run_options, log_filepath)

        return return_code, run_id, logs

    def _parse_logs_for_results(self, logs):
        """Parse log string to extract results and return dictionary.

        The format of the log line must be "key:value", whitespace will not matter
        and if there are more than 2 items found when split on ":", it will not
        log this as a key/value result

        Note
        ----
        If the same key is found multiple times in the logs, the last occurring
        one will be the one that is saved.

        Parameters
        ----------
        logs : str
            raw string value of output logs

        Returns
        -------
        dict or None
            dictionary to represent results from task
        """
        results = {}
        for line in logs.split("\n"):
            split_line = line.split(":")
            if len(split_line) == 2:
                results[split_line[0].strip()] = split_line[1].strip()
        if results == {}:
            results = None
        return results

    def run(self, task_id, snapshot_dict=None, task_dict=None):
        """Run a task with parameters. If dictionary specified, create a new task with new run parameters.
        Snapshot objects are created before and after the task to keep track of the state. During the run,
        you can access task outputs using environment variable DATMO_TASK_DIR or `/task` which points to
        location for the task files. Create config.json, stats.json and any weights or any file such
        as graphs and visualizations within that directory for quick access

        Parameters
        ----------
        task_id : str
            id for the task you would like to run
        snapshot_dict : dict
            set of parameters to create a snapshot (see SnapshotController for details.
            default is None, which means dictionary with `visible` False will be added to
            hide auto-generated snapshot) NOTE: `visible` False will always be False regardless
            of whether the user provides another value for `visible`.
        task_dict : dict
            set of parameters to characterize the task run
            (default is None, which translate to {}, see datmo.core.entity.task.Task for more details on inputs)

        Returns
        -------
        Task
            the Task object which completed its run with updated parameters

        Raises
        ------
        TaskRunError
            If there is any error in creating files for the task or downstream errors
        """
        # Ensure visible=False is present in the snapshot dictionary
        if not snapshot_dict:
            snapshot_dict = {"visible": False}
        else:
            snapshot_dict['visible'] = False

        if not task_dict:
            task_dict = {}
        # Obtain Task to run
        task_obj = self.dal.task.get_by_id(task_id)

        # Ensure that at least 1 of command, command_list,  or interactive is present in task_dict
        important_task_args = ["command", "command_list", "interactive"]
        if not task_dict.get('command', task_obj.command) and \
            not task_dict.get('command_list', task_obj.command_list) and \
                not task_dict.get('interactive', task_obj.interactive):
            raise RequiredArgumentMissing(
                __("error", "controller.task.run.arg",
                   " or ".join(important_task_args)))

        if task_obj.status is None:
            task_obj.status = "RUNNING"
        else:
            raise TaskRunError(
                __("error", "cli.run.run.already_running", task_obj.id))
        # Create Task directory for user during run
        task_dirpath = os.path.join(".datmo", "tasks", task_obj.id)
        try:
            _ = self.file_driver.create(task_dirpath, directory=True)
        except Exception:
            raise TaskRunError(
                __("error", "controller.task.run", task_dirpath))
        # Create the before snapshot prior to execution
        before_snapshot_dict = snapshot_dict.copy()
        before_snapshot_dict[
            'message'] = "autogenerated snapshot created before task %s is run" % task_obj.id
        before_snapshot_obj = self.snapshot.create(before_snapshot_dict)
        # Update the task with pre-execution parameters, prefer list first then look for string command
        # List command will overwrite a string command if given
        if task_dict.get('command_list', task_obj.command_list):
            task_dict['command'] = " ".join(
                task_dict.get('command_list', task_obj.command_list))
        else:
            if task_dict.get('command', task_obj.command):
                task_dict['command_list'] = shlex.split(
                    task_dict.get('command', task_obj.command))
            elif not task_dict.get('interactive', task_obj.interactive):
                # If it's not interactive then there is not expected task
                raise TaskNoCommandGiven()

        validate("create_task", task_dict)
        task_obj = self.dal.task.update({
            "id":
                task_obj.id,
            "before_snapshot_id":
                task_dict.get('before_snapshot_id', before_snapshot_obj.id),
            "command":
                task_dict.get('command', task_obj.command),
            "command_list":
                task_dict.get('command_list', task_obj.command_list),
            "gpu":
                task_dict.get('gpu', False),
            "mem_limit":
                task_dict.get('mem_limit', None),
            "workspace":
                task_dict.get('workspace', None),
            "interactive":
                task_dict.get('interactive', task_obj.interactive),
            "detach":
                task_dict.get('detach', task_obj.detach),
            "ports":
                task_dict.get('ports', task_obj.ports),
            "task_dirpath":
                task_dict.get('task_dirpath', task_dirpath),
            "log_filepath":
                task_dict.get('log_filepath',
                              os.path.join(task_dirpath, "task.log")),
            "start_time":
                task_dict.get('start_time', datetime.utcnow()),
            "status":
                task_obj.status
        })

        # Copy over files from the before_snapshot file collection to task dir
        file_collection_obj =  \
            self.dal.file_collection.get_by_id(before_snapshot_obj.file_collection_id)
        self.file_driver.copytree(
            os.path.join(self.home, file_collection_obj.path),
            os.path.join(self.home, task_obj.task_dirpath))

        return_code, run_id, logs = 0, None, None

        try:
            # Set the parameters set in the task
            if task_obj.detach and task_obj.interactive:
                raise TaskInteractiveDetachError(
                    __("error", "controller.task.run.args.detach.interactive"))

            environment_run_options = {
                "command": task_obj.command_list,
                "ports": [] if task_obj.ports is None else task_obj.ports,
                "name": "datmo-task-" + self.model.id + "-" + task_obj.id,
                "volumes": {
                    os.path.join(self.home, task_obj.task_dirpath): {
                        'bind': '/task/',
                        'mode': 'rw'
                    },
                    self.home: {
                        'bind': '/home/',
                        'mode': 'rw'
                    }
                },
                "mem_limit": task_obj.mem_limit,
                "workspace": task_obj.workspace,
                "gpu": task_obj.gpu,
                "detach": task_obj.detach,
                "stdin_open": task_obj.interactive,
                "tty": task_obj.interactive,
                "api": False
            }
            # Run environment via the helper function
            return_code, run_id, logs =  \
                self._run_helper(before_snapshot_obj.environment_id,
                                 environment_run_options,
                                 os.path.join(self.home, task_obj.log_filepath))

        except Exception as e:
            return_code = 1
            logs += "Error running task: %" % e.message
        finally:
            # Create the after snapshot after execution is completed with new paths
            after_snapshot_dict = snapshot_dict.copy()
            after_snapshot_dict[
                'message'] = "autogenerated snapshot created after task %s is run" % task_obj.id

            # Add in absolute paths from running task directory
            absolute_task_dir_path = os.path.join(self.home,
                                                  task_obj.task_dirpath)
            absolute_paths = []
            for item in os.listdir(absolute_task_dir_path):
                path = os.path.join(absolute_task_dir_path, item)
                if os.path.isfile(path) or os.path.isdir(path):
                    absolute_paths.append(path)
            after_snapshot_dict.update({
                "paths": absolute_paths,
                "environment_id": before_snapshot_obj.environment_id,
            })
            after_snapshot_obj = self.snapshot.create(after_snapshot_dict)

            # (optional) Remove temporary task directory path
            # Update the task with post-execution parameters
            end_time = datetime.utcnow()
            duration = (end_time - task_obj.start_time).total_seconds()
            update_task_dict = {
                "id": task_obj.id,
                "after_snapshot_id": after_snapshot_obj.id,
                "logs": logs,
                "status": "SUCCESS" if return_code == 0 else "FAILED",
                # "results": task_obj.results, # TODO: update during run
                "end_time": end_time,
                "duration": duration
            }
            if logs is not None:
                update_task_dict["results"] = self._parse_logs_for_results(
                    logs)
            if run_id is not None:
                update_task_dict["run_id"] = run_id
            return self.dal.task.update(update_task_dict)

    def list(self, session_id=None, sort_key=None, sort_order=None):
        query = {}
        if session_id:
            try:
                self.dal.session.get_by_id(session_id)
            except EntityNotFound:
                raise SessionDoesNotExist(
                    __("error", "controller.task.list", session_id))
            query['session_id'] = session_id
        return self.dal.task.query(query, sort_key, sort_order)

    def get(self, task_id):
        """Get task object and return

        Parameters
        ----------
        task_id : str
            id for the task you would like to get

        Returns
        -------
        datmo.core.entity.task.Task
            core task object

        Raises
        ------
        DoesNotExist
            task does not exist
        """
        try:
            return self.dal.task.get_by_id(task_id)
        except EntityNotFound:
            raise DoesNotExist()

    def get_files(self, task_id, mode="r"):
        """Get list of file objects for task id. It will look in the following areas in the following order

        1) look in the after snapshot for file collection
        2) look in the running task file collection
        3) look in the before snapshot for file collection

        Parameters
        ----------
        task_id : str
            id for the task you would like to get file objects for
        mode : str
            file open mode
            (default is "r" to open file for read)

        Returns
        -------
        list
            list of python file objects

        Raises
        ------
        DoesNotExist
            task object does not exist
        PathDoesNotExist
            no file objects exist for the task
        """
        try:
            task_obj = self.dal.task.get_by_id(task_id)
        except EntityNotFound:
            raise DoesNotExist()
        if task_obj.after_snapshot_id:
            # perform number 1) and return file list
            return self.snapshot.get_files(
                task_obj.after_snapshot_id, mode=mode)
        elif task_obj.task_dirpath:
            # perform number 2) and return file list
            return self.file_driver.get(
                task_obj.task_dirpath, mode=mode, directory=True)
        elif task_obj.before_snapshot_id:
            # perform number 3) and return file list
            return self.snapshot.get_files(
                task_obj.before_snapshot_id, mode=mode)
        else:
            # Error because the task does not have any files associated with it
            raise PathDoesNotExist()

    def delete(self, task_id):
        if not task_id:
            raise RequiredArgumentMissing(
                __("error", "controller.task.delete.arg", "id"))
        stopped_success = self.stop(task_id)
        delete_task_success = self.dal.task.delete(task_id)
        return stopped_success and delete_task_success

    def stop(self, task_id=None, all=False, status="STOPPED"):
        """Stop and remove run for the task and update task object statuses

        Parameters
        ----------
        task_id : str, optional
            id for the task you would like to stop
        all : bool, optional
            if specified, will stop all tasks within project

        Returns
        -------
        return_code : bool
            system return code of the stop

        Raises
        ------
        RequiredArgumentMissing
        TooManyArgumentsFound
        """
        if task_id is None and all is False:
            raise RequiredArgumentMissing(
                __("error", "controller.task.stop.arg.missing", "id"))
        if task_id and all:
            raise TooManyArgumentsFound()
        if task_id:
            try:
                task_obj = self.get(task_id)
            except DoesNotExist:
                time.sleep(1)
                task_obj = self.get(task_id)
            task_match_string = "datmo-task-" + self.model.id + "-" + task_id
            # Get the environment id associated with the task
            kwargs = {'match_string': task_match_string}
            # Get the environment from the task
            before_snapshot_id = task_obj.before_snapshot_id
            after_snapshot_id = task_obj.after_snapshot_id
            if not before_snapshot_id and not after_snapshot_id:
                # TODO: remove...for now database may not be in sync. no task that has run can have NO before_snapshot_id
                time.sleep(1)
                task_obj = self.get(task_id)
            if after_snapshot_id:
                after_snapshot_obj = self.snapshot.get(after_snapshot_id)
                kwargs['environment_id'] = after_snapshot_obj.environment_id
            if not after_snapshot_id and before_snapshot_id:
                before_snapshot_obj = self.snapshot.get(before_snapshot_id)
                kwargs['environment_id'] = before_snapshot_obj.environment_id
            return_code = self.environment.stop(**kwargs)
        if all:
            return_code = self.environment.stop(all=True)
        # Set stopped task statuses to STOPPED if return success
        if return_code:
            if task_id:
                self.dal.task.update({"id": task_id, "status": status})
            if all:
                task_objs = self.dal.task.query({})
                for task_obj in task_objs:
                    self.dal.task.update({"id": task_obj.id, "status": status})

        return return_code
Ejemplo n.º 26
0
class TestEnvironmentController():
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system(
        ) == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.project = ProjectController(self.temp_dir)
        self.environment = EnvironmentController(self.temp_dir)

    def teardown_method(self):
        pass

    def test_create(self):
        # 0) Test create when unsupported language given
        # 1) Test create when NO file exists and NO definition path exists
        # 2) Test create when NO file exists and definition path exists
        # 3) Test create when definition path exists and given
        # 4) Test create when file exists and definition path exists
        # 5) Test create when file exists but NO definition path exists
        # 6) Test create when definition path exists and given for NEW definition filepath

        self.project.init("test3", "test description")

        # 0) Test option 0
        try:
            self.environment.create({"language": "java"})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # 1) Test option 1
        failed = False
        try:
            self.environment.create({})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # 2) Test option 2
        environment_obj_1 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_1
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # 3) Test option 3
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj_2 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_2
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_3
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Remove definition filepath
        os.remove(definition_filepath)

        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_4.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_4
        assert environment_obj_4.id
        assert environment_obj_4.driver_type == "docker"
        assert environment_obj_4.file_collection_id
        assert environment_obj_4.definition_filename
        assert environment_obj_4.hardware_info
        assert environment_obj_4.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(
            os.path.join(file_collection_dir, "requirements.txt"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_1.id != environment_obj_4.id

        # 6) Test option 6

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM cloudgear/ubuntu:14.04")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create a new environment obj
        environment_obj_5 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_5.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_5
        assert environment_obj_5.id
        assert environment_obj_5.driver_type == "docker"
        assert environment_obj_5.file_collection_id
        assert environment_obj_5.definition_filename
        assert environment_obj_5.hardware_info
        assert environment_obj_5.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_5.id != environment_obj_1.id
        assert environment_obj_5.id != environment_obj_4.id

    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project.init("test5", "test description")

        # 1) Test option 1
        failed = False
        try:
            _ = self.environment.build("does_not_exist")
        except EntityNotFound:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict)
        result = self.environment.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment.create({})
        result = self.environment.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})
        result = self.environment.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})
        result = self.environment.build(environment_obj_4.id)
        assert result
        assert environment_obj_4.id != environment_obj_1.id

        # teardown
        self.environment.delete(environment_obj_1.id)
        self.environment.delete(environment_obj_4.id)

    def test_run(self):
        # 1) Test run simple command with simple Dockerfile
        # 2) Test run script, with autogenerated definition
        self.project.init("test5", "test description")

        # 1) Test option 1

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment.create({})
        self.environment.build(environment_obj.id)

        run_options = {
            "command": ["python", "script.py"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": {
                self.environment.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

    def test_list(self):
        self.project.init("test4", "test description")

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_path_1, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict_1 = {
            "definition_filepath": definition_path_1,
        }

        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment.home, "Dockerfile2")
        with open(definition_path_2, "w") as f:
            f.write(to_unicode(str("FROM datmo/scikit-opencv")))

        input_dict_2 = {
            "definition_filepath": definition_path_2,
        }

        # Create second environment in the project
        environment_obj_2 = self.environment.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_delete(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        # Delete environment in the project
        result = self.environment.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment.dal.environment.get_by_id(environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    def test_stop(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        # Create environment_driver definition
        env_def_path = os.path.join(self.project.home, "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        _, run_id, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        # Stop the running environment
        return_code = self.environment.stop(run_id)
        assert return_code

        # teardown
        self.environment.delete(environment_obj.id)
Ejemplo n.º 27
0
class TestSnapshotController():
    def setup_method(self):
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        Config().set_home(self.temp_dir)
        self.environment_ids = []

    def teardown_method(self):
        if not check_docker_inactive(test_datmo_dir,
                                     Config().datmo_directory_name):
            self.__setup()
            self.environment_controller = EnvironmentController()
            for env_id in list(set(self.environment_ids)):
                if not self.environment_controller.delete(env_id):
                    raise Exception

    def __setup(self):
        Config().set_home(self.temp_dir)
        self.project_controller = ProjectController()
        self.project_controller.init("test", "test description")
        self.task_controller = TaskController()
        self.snapshot_controller = SnapshotController()

    def test_init_fail_project_not_init(self):
        Config().set_home(self.temp_dir)
        failed = False
        try:
            SnapshotController()
        except ProjectNotInitialized:
            failed = True
        assert failed

    def test_init_fail_invalid_path(self):
        test_home = "some_random_dir"
        Config().set_home(test_home)
        failed = False
        try:
            SnapshotController()
        except InvalidProjectPath:
            failed = True
        assert failed

    def test_current_snapshot(self):
        self.__setup()
        # Test failure for unstaged changes
        failed = False
        try:
            self.snapshot_controller.current_snapshot()
        except UnstagedChanges:
            failed = True
        assert failed
        # Test success after snapshot created
        snapshot_obj = self.__default_create()
        current_snapshot_obj = self.snapshot_controller.current_snapshot()
        assert current_snapshot_obj == snapshot_obj

    def test_create_fail_no_message(self):
        self.__setup()
        # Test no message
        failed = False
        try:
            self.snapshot_controller.create({})
        except RequiredArgumentMissing:
            failed = True
        assert failed

    def test_create_success_no_code(self):
        self.__setup()
        # Test default values for snapshot, fail due to code
        result = self.snapshot_controller.create(
            {"message": "my test snapshot"})
        assert result

    def test_create_success_no_code_environment(self):
        self.__setup()
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # test must pass when there is file present in root project folder
        result = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        assert result

    def test_create_success_no_code_environment_files(self):
        self.__setup()
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        test_file = os.path.join(
            self.project_controller.file_driver.files_directory, "test.txt")
        with open(test_file, "wb") as f:
            f.write(to_bytes(str("hello")))

        # test must pass when there is file present in root project folder
        result = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        assert result

    def test_create_no_environment_detected_in_file(self):
        self.__setup()

        # Test default values for snapshot, fail due to no environment from file
        self.snapshot_controller.file_driver.create("filepath1")
        snapshot_obj_0 = self.snapshot_controller.create(
            {"message": "my test snapshot"})
        assert isinstance(snapshot_obj_0, Snapshot)
        assert snapshot_obj_0.code_id
        assert snapshot_obj_0.environment_id
        assert snapshot_obj_0.file_collection_id
        assert snapshot_obj_0.config == {}
        assert snapshot_obj_0.stats == {}

    def test_create_success_default_detected_in_file(self):
        self.__setup()
        # Test default values for snapshot when there is no environment
        test_filepath = os.path.join(self.snapshot_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import os\n"))
            f.write(to_bytes("import sys\n"))
            f.write(to_bytes("print('hello')\n"))

        snapshot_obj_1 = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        assert isinstance(snapshot_obj_1, Snapshot)
        assert snapshot_obj_1.code_id
        assert snapshot_obj_1.environment_id
        assert snapshot_obj_1.file_collection_id
        assert snapshot_obj_1.config == {}
        assert snapshot_obj_1.stats == {}

    def test_create_success_default_env_def(self):
        self.__setup()
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Creating a file in project folder
        test_filepath = os.path.join(self.snapshot_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        # Test default values for snapshot, success
        snapshot_obj = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        assert isinstance(snapshot_obj, Snapshot)
        assert snapshot_obj.code_id
        assert snapshot_obj.environment_id
        assert snapshot_obj.file_collection_id
        assert snapshot_obj.config == {}
        assert snapshot_obj.stats == {}

    def test_create_success_with_environment(self):
        self.__setup()
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # creating a file in project folder
        test_filepath = os.path.join(self.snapshot_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        # Test default values for snapshot, success
        snapshot_obj = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        assert isinstance(snapshot_obj, Snapshot)
        assert snapshot_obj.code_id
        assert snapshot_obj.environment_id
        assert snapshot_obj.file_collection_id
        assert snapshot_obj.config == {}
        assert snapshot_obj.stats == {}

    def test_create_success_env_paths(self):
        self.__setup()
        # Create environment definition
        random_dir = os.path.join(self.snapshot_controller.home, "random_dir")
        os.makedirs(random_dir)
        env_def_path = os.path.join(random_dir, "randomDockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))
        environment_paths = [env_def_path + ">Dockerfile"]

        # Test default values for snapshot, success
        snapshot_obj = self.snapshot_controller.create({
            "message":
            "my test snapshot",
            "environment_paths":
            environment_paths
        })

        assert isinstance(snapshot_obj, Snapshot)
        assert snapshot_obj.code_id
        assert snapshot_obj.environment_id
        assert snapshot_obj.file_collection_id
        assert snapshot_obj.config == {}
        assert snapshot_obj.stats == {}

    def test_create_success_default_env_def_duplicate(self):
        self.__setup()
        # Test 2 snapshots with same parameters
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Creating a file in project folder
        test_filepath = os.path.join(self.snapshot_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        snapshot_obj = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        snapshot_obj_1 = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        # Should return the same object back
        assert snapshot_obj_1.id == snapshot_obj.id
        assert snapshot_obj_1.code_id == snapshot_obj.code_id
        assert snapshot_obj_1.environment_id == \
               snapshot_obj.environment_id
        assert snapshot_obj_1.file_collection_id == \
               snapshot_obj.file_collection_id
        assert snapshot_obj_1.config == \
               snapshot_obj.config
        assert snapshot_obj_1.stats == \
               snapshot_obj.stats

    def test_create_success_given_files_env_def_config_file_stats_file(self):
        self.__setup()
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Creating a file in project folder
        test_filepath = os.path.join(self.snapshot_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        snapshot_obj = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        # Create files to add
        _, files_directory_name = os.path.split(
            self.project_controller.file_driver.files_directory)
        files_directory_relative_path = os.path.join(
            self.project_controller.file_driver.datmo_directory_name,
            files_directory_name)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath1"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath2"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(
            os.path.join(files_directory_relative_path, "filepath1"))

        # Create config
        config_filepath = os.path.join(self.snapshot_controller.home,
                                       "config.json")
        with open(config_filepath, "wb") as f:
            f.write(to_bytes(str('{"foo":"bar"}')))

        # Create stats
        stats_filepath = os.path.join(self.snapshot_controller.home,
                                      "stats.json")
        with open(stats_filepath, "wb") as f:
            f.write(to_bytes(str('{"foo":"bar"}')))

        input_dict = {
            "message": "my test snapshot",
            "config_filepath": config_filepath,
            "stats_filepath": stats_filepath,
        }
        # Create snapshot in the project
        snapshot_obj_4 = self.snapshot_controller.create(input_dict)

        assert snapshot_obj_4 != snapshot_obj
        assert snapshot_obj_4.code_id != snapshot_obj.code_id
        assert snapshot_obj_4.environment_id == \
               snapshot_obj.environment_id
        assert snapshot_obj_4.file_collection_id != \
               snapshot_obj.file_collection_id
        assert snapshot_obj_4.config == {"foo": "bar"}
        assert snapshot_obj_4.stats == {"foo": "bar"}

    def test_create_success_given_files_env_def_different_config_stats(self):
        self.__setup()
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Creating a file in project folder
        test_filepath = os.path.join(self.snapshot_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        snapshot_obj = self.snapshot_controller.create(
            {"message": "my test snapshot"})

        # Create files to add
        _, files_directory_name = os.path.split(
            self.project_controller.file_driver.files_directory)
        files_directory_relative_path = os.path.join(
            self.project_controller.file_driver.datmo_directory_name,
            files_directory_name)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath1"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath2"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(
            os.path.join(files_directory_relative_path, "filepath1"))

        # Create config
        config_filepath = os.path.join(self.snapshot_controller.home,
                                       "config.json")
        with open(config_filepath, "wb") as f:
            f.write(to_bytes(str('{"foo":"bar"}')))

        # Create stats
        stats_filepath = os.path.join(self.snapshot_controller.home,
                                      "stats.json")
        with open(stats_filepath, "wb") as f:
            f.write(to_bytes(str('{"foo":"bar"}')))

        # Test different config and stats inputs
        input_dict = {
            "message": "my test snapshot",
            "config_filename": "different_name",
            "stats_filename": "different_name",
        }

        # Create snapshot in the project
        snapshot_obj_1 = self.snapshot_controller.create(input_dict)

        assert snapshot_obj_1 != snapshot_obj
        assert snapshot_obj_1.config == {}
        assert snapshot_obj_1.stats == {}

    def test_create_success_given_files_env_def_direct_config_stats(self):
        self.__setup()
        # Create environment definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Create files to add
        _, files_directory_name = os.path.split(
            self.project_controller.file_driver.files_directory)
        files_directory_relative_path = os.path.join(
            self.project_controller.file_driver.datmo_directory_name,
            files_directory_name)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath1"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath2"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(
            os.path.join(files_directory_relative_path, "filepath1"))

        # Creating a file in project folder
        test_filepath = os.path.join(self.snapshot_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        # Test different config and stats inputs
        input_dict = {
            "message": "my test snapshot",
            "config": {
                "foo": "bar"
            },
            "stats": {
                "foo": "bar"
            },
        }

        # Create snapshot in the project
        snapshot_obj_6 = self.snapshot_controller.create(input_dict)

        assert snapshot_obj_6.config == {"foo": "bar"}
        assert snapshot_obj_6.stats == {"foo": "bar"}

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_create_from_task(self):
        self.__setup()
        # 0) Test if fails with TaskNotComplete error
        # 1) Test if success with empty task files, results
        # 2) Test if success with task files, results, and message
        # 3) Test if success with message, label, config and stats
        # 4) Test if success with updated stats from after_snapshot_id and task_results

        # Create task in the project
        task_obj = self.task_controller.create()

        # 0) Test option 0
        failed = False
        try:
            _ = self.snapshot_controller.create_from_task(
                message="my test snapshot", task_id=task_obj.id)
        except TaskNotComplete:
            failed = True
        assert failed

        # 1) Test option 1

        # Create task_dict
        task_command = ["sh", "-c", "echo test"]
        task_dict = {"command_list": task_command}

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        updated_task_obj = self.task_controller.run(task_obj.id,
                                                    task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        snapshot_obj = self.snapshot_controller.create_from_task(
            message="my test snapshot", task_id=updated_task_obj.id)

        assert isinstance(snapshot_obj, Snapshot)
        assert snapshot_obj.id == updated_task_obj.after_snapshot_id
        assert snapshot_obj.message == "my test snapshot"
        assert snapshot_obj.stats == updated_task_obj.results
        assert snapshot_obj.visible == True

        # Create new task and corresponding dict
        task_obj = self.task_controller.create()
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_dict = {"command_list": task_command}

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Test the default values
        updated_task_obj = self.task_controller.run(task_obj.id,
                                                    task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        # 2) Test option 2
        snapshot_obj = self.snapshot_controller.create_from_task(
            message="my test snapshot", task_id=updated_task_obj.id)

        assert isinstance(snapshot_obj, Snapshot)
        assert snapshot_obj.id == updated_task_obj.after_snapshot_id
        assert snapshot_obj.message == "my test snapshot"
        assert snapshot_obj.stats == updated_task_obj.results
        assert snapshot_obj.visible == True

        # 3) Test option 3
        test_config = {"algo": "regression"}
        test_stats = {"accuracy": 0.9}
        snapshot_obj = self.snapshot_controller.create_from_task(
            message="my test snapshot",
            task_id=updated_task_obj.id,
            label="best",
            config=test_config,
            stats=test_stats)

        assert isinstance(snapshot_obj, Snapshot)
        assert snapshot_obj.id == updated_task_obj.after_snapshot_id
        assert snapshot_obj.message == "my test snapshot"
        assert snapshot_obj.label == "best"
        assert snapshot_obj.config == test_config
        assert snapshot_obj.stats == test_stats
        assert snapshot_obj.visible == True

        # 4) Test option 4
        test_config = {"algo": "regression"}
        test_stats = {"new_key": 0.9}
        task_obj_2 = self.task_controller.create()
        updated_task_obj_2 = self.task_controller.run(task_obj_2.id,
                                                      task_dict=task_dict,
                                                      snapshot_dict={
                                                          "config":
                                                          test_config,
                                                          "stats": test_stats
                                                      })
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_2.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        snapshot_obj = self.snapshot_controller.create_from_task(
            message="my test snapshot",
            task_id=updated_task_obj_2.id,
            label="best")
        updated_stats_dict = {}
        updated_stats_dict.update(test_stats)
        updated_stats_dict.update(updated_task_obj.results)

        assert isinstance(snapshot_obj, Snapshot)
        assert snapshot_obj.id == updated_task_obj_2.after_snapshot_id
        assert snapshot_obj.message == "my test snapshot"
        assert snapshot_obj.label == "best"
        assert snapshot_obj.stats == updated_stats_dict
        assert snapshot_obj.visible == True

    def __default_create(self):
        # Create files to add
        _, files_directory_name = os.path.split(
            self.project_controller.file_driver.files_directory)
        files_directory_relative_path = os.path.join(
            self.project_controller.file_driver.datmo_directory_name,
            files_directory_name)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath1"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(os.path.join(
            files_directory_relative_path, "dirpath2"),
                                                    directory=True)
        self.snapshot_controller.file_driver.create(
            os.path.join(files_directory_relative_path, "filepath1"))
        self.snapshot_controller.file_driver.create("filepath2")
        with open(os.path.join(self.snapshot_controller.home, "filepath2"),
                  "wb") as f:
            f.write(to_bytes(str("import sys\n")))
        # Create environment_driver definition
        env_def_path = os.path.join(
            self.project_controller.environment_driver.
            environment_directory_path, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Create config
        config_filepath = os.path.join(self.snapshot_controller.home,
                                       "config.json")
        with open(config_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        # Create stats
        stats_filepath = os.path.join(self.snapshot_controller.home,
                                      "stats.json")
        with open(stats_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        input_dict = {
            "message": "my test snapshot",
            "config_filename": config_filepath,
            "stats_filename": stats_filepath,
        }

        # Create snapshot in the project
        return self.snapshot_controller.create(input_dict)

    def test_check_unstaged_changes(self):
        self.__setup()
        # Check unstaged changes
        failed = False
        try:
            self.snapshot_controller.check_unstaged_changes()
        except UnstagedChanges:
            failed = True
        assert failed
        # Check no unstaged changes
        _ = self.__default_create()
        result = self.snapshot_controller.check_unstaged_changes()
        assert result == False

    def test_checkout(self):
        self.__setup()
        # Create snapshot
        snapshot_obj_1 = self.__default_create()

        # Create duplicate snapshot in project
        self.snapshot_controller.file_driver.create("test")
        snapshot_obj_2 = self.__default_create()

        assert snapshot_obj_2 != snapshot_obj_1

        # Checkout to snapshot 1 using snapshot id
        result = self.snapshot_controller.checkout(snapshot_obj_1.id)
        # TODO: Check for which snapshot we are on

        assert result == True

    def test_list(self):
        self.__setup()
        # Create file to add to snapshot
        test_filepath_1 = os.path.join(self.snapshot_controller.home,
                                       "test.txt")
        with open(test_filepath_1, "wb") as f:
            f.write(to_bytes(str("test")))

        # Create snapshot in the project
        snapshot_obj_1 = self.__default_create()

        # Create file to add to second snapshot
        test_filepath_2 = os.path.join(self.snapshot_controller.home,
                                       "test2.txt")
        with open(test_filepath_2, "wb") as f:
            f.write(to_bytes(str("test2")))

        # Create second snapshot in the project
        snapshot_obj_2 = self.__default_create()

        # List all snapshots and ensure they exist
        result = self.snapshot_controller.list()

        assert len(result) == 2 and \
            snapshot_obj_1 in result and \
            snapshot_obj_2 in result

        # List all tasks regardless of filters in ascending
        result = self.snapshot_controller.list(sort_key='created_at',
                                               sort_order='ascending')

        assert len(result) == 2 and \
               snapshot_obj_1 in result and \
               snapshot_obj_2 in result
        assert result[0].created_at <= result[-1].created_at

        # List all tasks regardless of filters in descending
        result = self.snapshot_controller.list(sort_key='created_at',
                                               sort_order='descending')
        assert len(result) == 2 and \
               snapshot_obj_1 in result and \
               snapshot_obj_2 in result
        assert result[0].created_at >= result[-1].created_at

        # Wrong order being passed in
        failed = False
        try:
            _ = self.snapshot_controller.list(sort_key='created_at',
                                              sort_order='wrong_order')
        except InvalidArgumentType:
            failed = True
        assert failed

        # Wrong key and order being passed in
        failed = False
        try:
            _ = self.snapshot_controller.list(sort_key='wrong_key',
                                              sort_order='wrong_order')
        except InvalidArgumentType:
            failed = True
        assert failed

        # wrong key and right order being passed in
        expected_result = self.snapshot_controller.list(sort_key='created_at',
                                                        sort_order='ascending')
        result = self.snapshot_controller.list(sort_key='wrong_key',
                                               sort_order='ascending')
        expected_ids = [item.id for item in expected_result]
        ids = [item.id for item in result]
        assert set(expected_ids) == set(ids)

        # List snapshots with visible filter
        result = self.snapshot_controller.list(visible=False)
        assert len(result) == 0

        result = self.snapshot_controller.list(visible=True)
        assert len(result) == 2 and \
               snapshot_obj_1 in result and \
               snapshot_obj_2 in result

    def test_update(self):
        self.__setup()
        test_config = {"config_foo": "bar"}
        test_stats = {"stats_foo": "bar"}
        test_message = 'test_message'
        test_label = 'test_label'

        # Updating all config, stats, message and label
        # Create snapshot in the project
        snapshot_obj = self.__default_create()

        # Update snapshot in the project
        self.snapshot_controller.update(snapshot_obj.id,
                                        config=test_config,
                                        stats=test_stats,
                                        message=test_message,
                                        label=test_label)

        # Get the updated snapshot obj
        updated_snapshot_obj = self.snapshot_controller.dal.snapshot.get_by_id(
            snapshot_obj.id)
        assert updated_snapshot_obj.config == test_config
        assert updated_snapshot_obj.stats == test_stats
        assert updated_snapshot_obj.message == test_message
        assert updated_snapshot_obj.label == test_label

        # Updating config, stats
        # Create snapshot in the project
        snapshot_obj = self.__default_create()

        # Update snapshot in the project
        self.snapshot_controller.update(snapshot_obj.id,
                                        config=test_config,
                                        stats=test_stats)

        # Get the updated snapshot obj
        updated_snapshot_obj = self.snapshot_controller.dal.snapshot.get_by_id(
            snapshot_obj.id)
        assert updated_snapshot_obj.config == test_config
        assert updated_snapshot_obj.stats == test_stats

        # Updating both message and label
        # Create snapshot in the project
        snapshot_obj = self.__default_create()

        # Update snapshot in the project
        self.snapshot_controller.update(snapshot_obj.id,
                                        message=test_message,
                                        label=test_label)

        # Get the updated snapshot obj
        updated_snapshot_obj = self.snapshot_controller.dal.snapshot.get_by_id(
            snapshot_obj.id)

        assert updated_snapshot_obj.message == test_message
        assert updated_snapshot_obj.label == test_label

        # Updating only message
        # Create snapshot in the project
        snapshot_obj_1 = self.__default_create()

        # Update snapshot in the project
        self.snapshot_controller.update(snapshot_obj_1.id,
                                        message=test_message)

        # Get the updated snapshot obj
        updated_snapshot_obj_1 = self.snapshot_controller.dal.snapshot.get_by_id(
            snapshot_obj_1.id)

        assert updated_snapshot_obj_1.message == test_message

        # Updating only label
        # Create snapshot in the project
        snapshot_obj_2 = self.__default_create()

        # Update snapshot in the project
        self.snapshot_controller.update(snapshot_obj_2.id, label=test_label)

        # Get the updated snapshot obj
        updated_snapshot_obj_2 = self.snapshot_controller.dal.snapshot.get_by_id(
            snapshot_obj_2.id)

        assert updated_snapshot_obj_2.label == test_label

    def test_get(self):
        self.__setup()
        # Test failure for no snapshot
        failed = False
        try:
            self.snapshot_controller.get("random")
        except DoesNotExist:
            failed = True
        assert failed

        # Test success for snapshot
        snapshot_obj = self.__default_create()
        snapshot_obj_returned = self.snapshot_controller.get(snapshot_obj.id)
        assert snapshot_obj == snapshot_obj_returned

    def test_get_files(self):
        self.__setup()
        # Test failure case
        failed = False
        try:
            self.snapshot_controller.get_files("random")
        except DoesNotExist:
            failed = True
        assert failed

        # Test success case
        snapshot_obj = self.__default_create()
        result = self.snapshot_controller.get_files(snapshot_obj.id)
        file_collection_obj = self.task_controller.dal.file_collection.get_by_id(
            snapshot_obj.file_collection_id)

        file_names = [item.name for item in result]

        assert len(result) == 1
        for item in result:
            assert isinstance(item, TextIOWrapper)
            assert item.mode == "r"
        assert os.path.join(self.task_controller.home, ".datmo", "collections",
                            file_collection_obj.filehash,
                            "filepath1") in file_names

        result = self.snapshot_controller.get_files(snapshot_obj.id, mode="a")

        assert len(result) == 1
        for item in result:
            assert isinstance(item, TextIOWrapper)
            assert item.mode == "a"
        assert os.path.join(self.task_controller.home, ".datmo", "collections",
                            file_collection_obj.filehash,
                            "filepath1") in file_names

    def test_delete(self):
        self.__setup()
        # Create snapshot in the project
        snapshot_obj = self.__default_create()

        # Delete snapshot in the project
        result = self.snapshot_controller.delete(snapshot_obj.id)

        # Check if snapshot retrieval throws error
        thrown = False
        try:
            self.snapshot_controller.dal.snapshot.get_by_id(snapshot_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True
Ejemplo n.º 28
0
class ProjectController(BaseController):
    """ProjectController inherits from BaseController and manages business logic related to the
    project. One model is associated with each project currently.

    Methods
    -------
    init(name, description)
        Initialize the project repository as a new model or update the existing project
    cleanup()
        Remove all datmo references from the current repository. NOTE: THIS WILL DELETE ALL DATMO WORK
    status()
        Give the user a picture of the status of the project, snapshots, and tasks
    """

    def __init__(self):
        super(ProjectController, self).__init__()

    def init(self, name, description):
        """ Initialize the project

        This function will initialize the project or reinitialize it the project is
        already initialized.

        Parameters
        ----------
        name : str
        description : str

        Returns
        -------
        bool
        """
        is_new_model = False
        old_model = self.model
        if not self.model:
            is_new_model = True

        try:
            # Always validate inputs to the init function
            validate("create_project", {
                "name": name,
                "description": description
            })

            # Initialize File Driver if needed
            if not self.file_driver.is_initialized:
                self.file_driver.init()

            # Initialize the dal
            if not self.dal.is_initialized:
                self.dal.init()

            # Initialize Code Driver if needed
            if not self.code_driver.is_initialized:
                self.code_driver.init()

            # Initialize Environment Driver if needed
            if not self.environment_driver.is_initialized:
                self.environment_driver.init()

            # Initialize the config JSON store
            self.config_store = JSONStore(
                os.path.join(self.home,
                             Config().datmo_directory_name, ".config"))

            # Create model if new else update
            if is_new_model:
                _ = self.dal.model.create(
                    Model({
                        "name": name,
                        "description": description
                    }))
            else:
                self._model = self.dal.model.update({
                    "id": self.model.id,
                    "name": name,
                    "description": description
                })

            # Connect Environment Driver if needed
            # (not required but will warn if not present)
            try:
                if not self.environment_driver.is_connected:
                    self.environment_driver.connect()
            except EnvironmentConnectFailed:
                self.logger.warning(
                    __("warn", "controller.general.environment.failed"))

            # Build the initial default Environment (NOT NECESSARY)
            # self.environment_driver.build_image(tag="datmo-" + \
            #                                  self.model.name)
            return True
        except Exception:
            # if any error occurred with new model, ensure no initialize occurs and raise previous error
            # if any error occurred with existing model, ensure no updates were made, raise previous error
            if is_new_model:
                self.cleanup()
            else:
                self._model = self.dal.model.update({
                    "id": old_model.id,
                    "name": old_model.name,
                    "description": old_model.description
                })
            raise

    def cleanup(self):
        """Cleans the project structure completely

        Notes
        -----
        This function will not error out but will gracefully exit, since
        it is used in cases where init fails as a check against mid-initialized
        projects

        Returns
        -------
        bool
        """
        if not self.is_initialized:
            self.logger.warning(
                __("warn", "controller.project.cleanup.not_init"))
        # Remove Datmo environment_driver references, give warning if error
        try:
            # Obtain image id before cleaning up if exists
            images = self.environment_driver.list_images(name="datmo-" + \
                                                              self.model.name)
            image_id = images[0].id if images else None
        except Exception:
            self.logger.warning(
                __("warn", "controller.project.cleanup.environment"))

        # Remove Datmo code_driver references, give warning if error
        try:
            if self.code_driver.is_initialized:
                for ref in self.code_driver.list_refs():
                    self.code_driver.delete_ref(ref)
        except Exception:
            self.logger.warning(__("warn", "controller.project.cleanup.code"))
        try:
            # Remove Hidden Datmo file structure, give warning if error
            self.file_driver.delete_hidden_datmo_file_structure()
        except (FileIOError, PathDoesNotExist):
            self.logger.warning(__("warn", "controller.project.cleanup.files"))

        try:
            if image_id:
                # Remove image created during init
                self.environment_driver.remove_image(
                    image_id_or_name=image_id, force=True)

                # Remove any dangling images (optional)

                # Stop and remove all running environments with image_id
                self.environment_driver.stop_remove_containers_by_term(
                    image_id, force=True)
        except Exception:
            self.logger.warning(
                __("warn", "controller.project.cleanup.environment"))

        return True

    def status(self):
        """Return the project status information if initialized

        Returns
        -------
        status_dict : dict
            dictionary with project metadata and config
        current_snapshot : datmo.core.entity.snapshot.Snapshot
            snapshot object of the current state of the repo if present else None
        latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated by the user if present else None
        latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated automatically by datmo if present else None
        unstaged_code : bool
            True if code has unstaged changes
        unstaged_environment : bool
            True if environment has unstaged changes
        unstaged_files : bool
            True if files have unstaged changes
        """
        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.project.status"))
        # TODO: Add in note when environment is not setup or intialized

        # Add in project metadata
        status_dict = self.model.to_dictionary().copy()

        # Find  all project settings
        status_dict["config"] = self.config_store.to_dict()

        # Find the latest snapshot generated by the user
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": True
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_user_generated = descending_snapshots[
            0] if descending_snapshots else None

        # Show the latest snapshot generated automatically by datmo
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": False
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_auto_generated = descending_snapshots[
            0] if descending_snapshots else None

        # TODO: add in latest run

        self.code_controller = CodeController()
        try:
            unstaged_code = self.code_controller.check_unstaged_changes()
        except UnstagedChanges:
            unstaged_code = True

        self.environment_controller = EnvironmentController()
        try:
            unstaged_environment = self.environment_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_environment = True

        self.file_collection_controller = FileCollectionController()
        try:
            unstaged_files = self.file_collection_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_files = True

        # If exists, obtain the current snapshot, if unstaged changes, will be None
        self.snapshot_controller = SnapshotController()
        try:
            current_snapshot = self.snapshot_controller.current_snapshot()
        except UnstagedChanges:
            current_snapshot = None

        return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \
               unstaged_code, unstaged_environment, unstaged_files
Ejemplo n.º 29
0
    def status(self):
        """Return the project status information if initialized

        Returns
        -------
        status_dict : dict
            dictionary with project metadata and config
        current_snapshot : datmo.core.entity.snapshot.Snapshot
            snapshot object of the current state of the repo if present else None
        latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated by the user if present else None
        latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated automatically by datmo if present else None
        unstaged_code : bool
            True if code has unstaged changes
        unstaged_environment : bool
            True if environment has unstaged changes
        unstaged_files : bool
            True if files have unstaged changes
        """
        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.project.status"))
        # TODO: Add in note when environment is not setup or intialized

        # Add in project metadata
        status_dict = self.model.to_dictionary().copy()

        # Find  all project settings
        status_dict["config"] = self.config_store.to_dict()

        # Find the latest snapshot generated by the user
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": True
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_user_generated = descending_snapshots[
            0] if descending_snapshots else None

        # Show the latest snapshot generated automatically by datmo
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": False
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_auto_generated = descending_snapshots[
            0] if descending_snapshots else None

        # TODO: add in latest run

        self.code_controller = CodeController()
        try:
            unstaged_code = self.code_controller.check_unstaged_changes()
        except UnstagedChanges:
            unstaged_code = True

        self.environment_controller = EnvironmentController()
        try:
            unstaged_environment = self.environment_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_environment = True

        self.file_collection_controller = FileCollectionController()
        try:
            unstaged_files = self.file_collection_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_files = True

        # If exists, obtain the current snapshot, if unstaged changes, will be None
        self.snapshot_controller = SnapshotController()
        try:
            current_snapshot = self.snapshot_controller.current_snapshot()
        except UnstagedChanges:
            current_snapshot = None

        return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \
               unstaged_code, unstaged_environment, unstaged_files
Ejemplo n.º 30
0
    def test_stop_success(self):
        # TODO: test more run options
        # 1) Test run_id input to stop
        # 2) Test match_string input to stop
        # 3) Test all input to stop
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # 1) Test option 1

        _, run_id, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(run_id=run_id)

        assert return_code

        # 2) Test option 2
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(
            match_string="datmo-task-" + self.environment_controller.model.id)

        assert return_code

        # 3) Test option 3
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        run_options_2 = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test2",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options_2, log_filepath)
        return_code = self.environment_controller.stop(all=True)

        assert return_code

        # teardown
        self.environment_controller.delete(environment_obj.id)