Ejemplo n.º 1
0
 def check_and_report_finished(run_state):
     try:
         finished, exitcode, failure_msg = docker_utils.check_finished(run_state.container)
     except docker_utils.DockerException:
         traceback.print_exc()
         finished, exitcode, failure_msg = False, None, None
     new_info = dict(finished=finished, exitcode=exitcode, failure_message=failure_msg)
     run_state.info.update(new_info)
     run_state = run_state._replace(info=run_state.info)
     return run_state
Ejemplo n.º 2
0
 def check_and_report_finished(run_state):
     try:
         finished, exitcode, failure_msg = docker_utils.check_finished(
             run_state.container)
     except docker_utils.DockerException:
         traceback.print_exc()
         finished, exitcode, failure_msg = False, None, None
     new_info = dict(finished=finished,
                     exitcode=exitcode,
                     failure_message=failure_msg)
     run_state.info.update(new_info)
     run_state = run_state._replace(info=run_state.info)
     return run_state
Ejemplo n.º 3
0
    def _transition_from_CLEANING_UP(self, run_state):
        """
        1- delete the container if still existent
        2- clean up the dependencies from bundle folder
        3- release the dependencies in dependency manager
        4- If bundle has contents to upload (i.e. was RUNNING at some point),
            move to UPLOADING_RESULTS state
           Otherwise move to FINALIZING state
        """
        bundle_uuid = run_state.bundle['uuid']
        if run_state.container_id is not None:
            while True:
                try:
                    finished, _, _ = docker_utils.check_finished(
                        run_state.container)
                    if finished:
                        run_state.container.remove(force=True)
                        break
                except docker.errors.APIError:
                    traceback.print_exc()
                    time.sleep(1)

        for dep in run_state.bundle['dependencies']:
            self.dependency_manager.release(
                bundle_uuid, (dep['parent_uuid'], dep['parent_path']))

            child_path = os.path.join(run_state.bundle_path, dep['child_path'])
            try:
                remove_path(child_path)
            except Exception:
                traceback.print_exc()

        if run_state.has_contents:
            return run_state._replace(
                stage=LocalRunStage.UPLOADING_RESULTS,
                run_status='Uploading results',
                container=None,
            )
        else:
            return self.finalize_run(run_state)
Ejemplo n.º 4
0
    def _transition_from_CLEANING_UP(self, run_state):
        """
        1- delete the container if still existent
        2- clean up the dependencies from bundle folder
        3- release the dependencies in dependency manager
        4- If bundle has contents to upload (i.e. was RUNNING at some point),
            move to UPLOADING_RESULTS state
           Otherwise move to FINALIZING state
        """
        bundle_uuid = run_state.bundle['uuid']
        if run_state.container_id is not None:
            while True:
                try:
                    finished, _, _ = docker_utils.check_finished(run_state.container)
                    if finished:
                        run_state.container.remove(force=True)
                        break
                except docker.errors.APIError:
                    traceback.print_exc()
                    time.sleep(1)

        for dep in run_state.bundle['dependencies']:
            self.dependency_manager.release(bundle_uuid, (dep['parent_uuid'], dep['parent_path']))

            child_path = os.path.join(run_state.bundle_path, dep['child_path'])
            try:
                remove_path(child_path)
            except Exception:
                traceback.print_exc()

        if run_state.has_contents:
            return run_state._replace(
                stage=LocalRunStage.UPLOADING_RESULTS,
                run_status='Uploading results',
                container=None,
            )
        else:
            return self.finalize_run(run_state)
Ejemplo n.º 5
0
    def _transition_from_RUNNING(self, run_state):
        """
        1- Check run status of the docker container
        2- If run is killed, kill the container
        3- If run is finished, move to CLEANING_UP state
        """
        bundle_uuid = run_state.bundle['uuid']

        def check_and_report_finished(run_state):
            try:
                finished, exitcode, failure_msg = docker_utils.check_finished(
                    run_state.container)
            except docker_utils.DockerException:
                traceback.print_exc()
                finished, exitcode, failure_msg = False, None, None
            new_info = dict(finished=finished,
                            exitcode=exitcode,
                            failure_message=failure_msg)
            run_state.info.update(new_info)
            run_state = run_state._replace(info=run_state.info)
            return run_state

        def check_resource_utilization(run_state):
            kill_messages = []

            run_stats = docker_utils.get_container_stats(run_state.container)
            time_used = time.time() - run_state.start_time

            run_state = run_state._replace(time_used=time_used)
            run_state = run_state._replace(max_memory=max(
                run_state.max_memory, run_stats.get('memory', 0)))
            run_state = run_state._replace(
                disk_utilization=self.disk_utilization[bundle_uuid]
                ['disk_utilization'])

            if (run_state.resources['request_time'] and
                    run_state.time_used > run_state.resources['request_time']):
                kill_messages.append(
                    'Time limit %s exceeded.' %
                    duration_str(run_state.resources['request_time']))

            if run_state.max_memory > run_state.resources['request_memory']:
                kill_messages.append(
                    'Memory limit %s exceeded.' %
                    duration_str(run_state.resources['request_memory']))

            if (run_state.resources['request_disk']
                    and run_state.disk_utilization >
                    run_state.resources['request_disk']):
                kill_messages.append(
                    'Disk limit %sb exceeded.' %
                    size_str(run_state.resources['request_disk']))

            if kill_messages:
                new_info = run_state.info
                new_info['kill_message'] = ' '.join(kill_messages)
                run_state = run_state._replace(info=new_info, is_killed=True)

            return run_state

        def check_disk_utilization():
            running = True
            while running:
                start_time = time.time()
                try:
                    disk_utilization = get_path_size(run_state.bundle_path)
                    self.disk_utilization[bundle_uuid][
                        'disk_utilization'] = disk_utilization
                    running = self.disk_utilization[bundle_uuid]['running']
                except Exception:
                    traceback.print_exc()
                end_time = time.time()

                # To ensure that we don't hammer the disk for this computation when
                # there are lots of files, we run it at most 10% of the time.
                time.sleep(max((end_time - start_time) * 10, 1.0))

        self.disk_utilization.add_if_new(
            bundle_uuid,
            threading.Thread(target=check_disk_utilization, args=[]))
        run_state = check_and_report_finished(run_state)
        run_state = check_resource_utilization(run_state)

        if run_state.is_killed and run_state.container_id is not None:
            try:
                run_state.container.kill()
            except docker.errors.APIError:
                finished, _, _ = docker_utils.check_finished(
                    run_state.container)
                if not finished:
                    # If we can't kill a Running container, something is wrong
                    # Otherwise all well
                    traceback.print_exc()
            self.disk_utilization[bundle_uuid]['running'] = False
            self.disk_utilization.remove(bundle_uuid)
            return run_state._replace(stage=LocalRunStage.CLEANING_UP,
                                      container_id=None)
        if run_state.info['finished']:
            logger.debug(
                'Finished run with UUID %s, exitcode %s, failure_message %s',
                bundle_uuid,
                run_state.info['exitcode'],
                run_state.info['failure_message'],
            )
            self.disk_utilization[bundle_uuid]['running'] = False
            self.disk_utilization.remove(bundle_uuid)
            return run_state._replace(stage=LocalRunStage.CLEANING_UP,
                                      run_status='Uploading results')
        else:
            return run_state
Ejemplo n.º 6
0
    def _transition_from_RUNNING(self, run_state):
        """
        1- Check run status of the docker container
        2- If run is killed, kill the container
        3- If run is finished, move to CLEANING_UP state
        """
        bundle_uuid = run_state.bundle['uuid']

        def check_and_report_finished(run_state):
            try:
                finished, exitcode, failure_msg = docker_utils.check_finished(run_state.container)
            except docker_utils.DockerException:
                traceback.print_exc()
                finished, exitcode, failure_msg = False, None, None
            new_info = dict(finished=finished, exitcode=exitcode, failure_message=failure_msg)
            run_state.info.update(new_info)
            run_state = run_state._replace(info=run_state.info)
            return run_state

        def check_resource_utilization(run_state):
            kill_messages = []

            run_stats = docker_utils.get_container_stats(run_state.container)
            time_used = time.time() - run_state.start_time

            run_state = run_state._replace(time_used=time_used)
            run_state = run_state._replace(
                max_memory=max(run_state.max_memory, run_stats.get('memory', 0))
            )
            run_state = run_state._replace(
                disk_utilization=self.disk_utilization[bundle_uuid]['disk_utilization']
            )

            if (
                run_state.resources['request_time']
                and run_state.time_used > run_state.resources['request_time']
            ):
                kill_messages.append(
                    'Time limit %s exceeded.' % duration_str(run_state.resources['request_time'])
                )

            if (
                run_state.max_memory > run_state.resources['request_memory']
                or run_state.info.get('exitcode', '0') == '137'
            ):
                kill_messages.append(
                    'Memory limit %s exceeded.' % size_str(run_state.resources['request_memory'])
                )

            if (
                run_state.resources['request_disk']
                and run_state.disk_utilization > run_state.resources['request_disk']
            ):
                kill_messages.append(
                    'Disk limit %sb exceeded.' % size_str(run_state.resources['request_disk'])
                )

            if kill_messages:
                new_info = run_state.info
                new_info['kill_message'] = ' '.join(kill_messages)
                run_state = run_state._replace(info=new_info, is_killed=True)

            return run_state

        def check_disk_utilization():
            running = True
            while running:
                start_time = time.time()
                try:
                    disk_utilization = get_path_size(run_state.bundle_path)
                    self.disk_utilization[bundle_uuid]['disk_utilization'] = disk_utilization
                    running = self.disk_utilization[bundle_uuid]['running']
                except Exception:
                    traceback.print_exc()
                end_time = time.time()

                # To ensure that we don't hammer the disk for this computation when
                # there are lots of files, we run it at most 10% of the time.
                time.sleep(max((end_time - start_time) * 10, 1.0))

        self.disk_utilization.add_if_new(
            bundle_uuid, threading.Thread(target=check_disk_utilization, args=[])
        )
        run_state = check_and_report_finished(run_state)
        run_state = check_resource_utilization(run_state)

        if run_state.is_killed:
            try:
                run_state.container.kill()
            except docker.errors.APIError:
                finished, _, _ = docker_utils.check_finished(run_state.container)
                if not finished:
                    # If we can't kill a Running container, something is wrong
                    # Otherwise all well
                    traceback.print_exc()
            self.disk_utilization[bundle_uuid]['running'] = False
            self.disk_utilization.remove(bundle_uuid)
            return run_state._replace(stage=LocalRunStage.CLEANING_UP)
        if run_state.info['finished']:
            logger.debug(
                'Finished run with UUID %s, exitcode %s, failure_message %s',
                bundle_uuid,
                run_state.info['exitcode'],
                run_state.info['failure_message'],
            )
            self.disk_utilization[bundle_uuid]['running'] = False
            self.disk_utilization.remove(bundle_uuid)
            return run_state._replace(
                stage=LocalRunStage.CLEANING_UP, run_status='Uploading results'
            )
        else:
            return run_state