Esempio n. 1
0
    def create_base(self):
        self.info('create_base')

        self.provider = TaskProvider(self.session)
        self.library_provider = DagLibraryProvider(self.session)
        self.storage = Storage(self.session)

        self.task = self.provider.by_id(
            self.id, joinedload(Task.dag_rel, innerjoin=True))
        if not self.task:
            raise Exception(f'task with id = {self.id} is not found')

        self.dag = self.task.dag_rel
        self.executor = None
        self.hostname = socket.gethostname()

        self.docker_img = DOCKER_IMG
        self.worker_index = os.getenv('WORKER_INDEX', -1)

        self.queue_personal = f'{self.hostname}_{self.docker_img}_' \
                              f'{self.worker_index}'

        self.config = Config.from_yaml(self.dag.config)
        self.executor_type = self.config['executors'][
            self.task.executor]['type']
Esempio n. 2
0
    def create_base(self):
        self.info('create_base')

        if app.current_task:
            app.current_task.update_state(state=states.SUCCESS)
            app.control.revoke(app.current_task.request.id, terminate=True)

        self.provider = TaskProvider(self.session)
        self.library_provider = DagLibraryProvider(self.session)
        self.storage = Storage(self.session)

        self.task = self.provider.by_id(
            self.id, joinedload(Task.dag_rel, innerjoin=True))
        if not self.task:
            raise Exception(f'task with id = {self.id} is not found')

        self.dag = self.task.dag_rel
        self.executor = None
        self.hostname = socket.gethostname()

        self.docker_img = DOCKER_IMG
        self.worker_index = os.getenv('WORKER_INDEX', -1)

        self.queue_personal = f'{self.hostname}_{self.docker_img}_' \
                              f'{self.worker_index}'

        self.config = Config.from_yaml(self.dag.config)

        set_global_seed(self.config['info'].get('seed', 0))

        self.executor_type = self.config['executors'][
            self.task.executor]['type']

        executor = self.config['executors'][self.task.executor]

        if os.getenv('CUDA_VISIBLE_DEVICES', '').strip() != '':
            cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES',
                                             '').split(',')
            self.task.gpu_assigned = ','.join([
                cuda_visible_devices[int(g)]
                for g in (self.task.gpu_assigned or '').split(',')
            ])
            cuda_visible_devices = self.task.gpu_assigned
        else:
            cuda_visible_devices = self.task.gpu_assigned

        cuda_visible_devices = cuda_visible_devices or ''

        env = {
            'MKL_NUM_THREADS': 1,
            'OMP_NUM_THREADS': 1,
            'CUDA_VISIBLE_DEVICES': cuda_visible_devices
        }
        env.update(executor.get('env', {}))

        for k, v in env.items():
            os.environ[k] = str(v)
            self.info(f'Set env. {k} = {v}')
Esempio n. 3
0
    def __init__(self, session: Session, logger=None,
                 component: ComponentType = None,
                 max_file_size: int = 10 ** 5, max_count=10 ** 3):
        self.file_provider = FileProvider(session)
        self.provider = DagStorageProvider(session)
        self.task_provider = TaskProvider(session)
        self.library_provider = DagLibraryProvider(session)
        self.dag_provider = DagProvider(session)

        self.logger = logger
        self.component = component
        self.max_file_size = max_file_size
        self.max_count = max_count
Esempio n. 4
0
    def create_base(self):
        self.info('create_base')

        if app.current_task:
            app.current_task.update_state(state=states.SUCCESS)
            app.control.revoke(app.current_task.request.id, terminate=True)

        self.provider = TaskProvider(self.session)
        self.library_provider = DagLibraryProvider(self.session)
        self.storage = Storage(self.session)

        self.task = self.provider.by_id(
            self.id, joinedload(Task.dag_rel, innerjoin=True))
        if not self.task:
            raise Exception(f'task with id = {self.id} is not found')

        self.dag = self.task.dag_rel
        self.executor = None
        self.hostname = socket.gethostname()

        self.docker_img = DOCKER_IMG
        self.worker_index = os.getenv('WORKER_INDEX', -1)

        self.queue_personal = f'{self.hostname}_{self.docker_img}_' \
                              f'{self.worker_index}'

        self.config = Config.from_yaml(self.dag.config)
        self.executor_type = self.config['executors'][
            self.task.executor]['type']

        executor = self.config['executors'][self.task.executor]
        env = {'MKL_NUM_THREADS': 1, 'OMP_NUM_THREADS': 1}
        env.update(executor.get('env', {}))

        for k, v in env.items():
            os.environ[k] = str(v)
            self.info(f'Set env. {k} = {v}')
Esempio n. 5
0
class ExecuteBuilder:
    def __init__(self, id: int, repeat_count: int = 1, exit=True):
        self.session = Session.create_session(key='ExecuteBuilder')
        self.id = id
        self.repeat_count = repeat_count
        self.logger = create_logger(self.session, 'ExecuteBuilder')
        self.logger_db = create_logger(self.session,
                                       'ExecuteBuilder.db',
                                       console=False)
        self.exit = exit

        self.provider = None
        self.library_provider = None
        self.storage = None
        self.task = None
        self.dag = None
        self.executor = None
        self.hostname = None
        self.docker_img = None
        self.worker_index = None
        self.queue_personal = None
        self.config = None
        self.executor_type = None

    def info(self, msg: str, step=None):
        self.logger.info(msg, ComponentType.Worker, self.hostname, self.id,
                         step)

    def error(self, msg: str, step=None):
        self.logger.error(msg, ComponentType.Worker, self.hostname, self.id,
                          step)

    def warning(self, msg: str, step=None):
        self.logger.warning(msg, ComponentType.Worker, self.hostname, self.id,
                            step)

    def debug(self, msg: str, step=None):
        self.logger.debug(msg, ComponentType.Worker, self.hostname, self.id,
                          step)

    def create_base(self):
        self.info('create_base')

        if app.current_task:
            app.current_task.update_state(state=states.SUCCESS)
            app.control.revoke(app.current_task.request.id, terminate=True)

        self.provider = TaskProvider(self.session)
        self.library_provider = DagLibraryProvider(self.session)
        self.storage = Storage(self.session)

        self.task = self.provider.by_id(
            self.id, joinedload(Task.dag_rel, innerjoin=True))
        if not self.task:
            raise Exception(f'task with id = {self.id} is not found')

        self.dag = self.task.dag_rel
        self.executor = None
        self.hostname = socket.gethostname()

        self.docker_img = DOCKER_IMG
        self.worker_index = os.getenv('WORKER_INDEX', -1)

        self.queue_personal = f'{self.hostname}_{self.docker_img}_' \
                              f'{self.worker_index}'

        self.config = Config.from_yaml(self.dag.config)

        set_global_seed(self.config['info'].get('seed', 0))

        self.executor_type = self.config['executors'][
            self.task.executor]['type']

        executor = self.config['executors'][self.task.executor]

        cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '')
        self.info(f'Env.before execution '
                  f'CUDA_VISIBLE_DEVICES={cuda_visible_devices}')

        if cuda_visible_devices.strip() != '':
            gpu_assigned = self.task.gpu_assigned or ''

            cuda_visible_devices = cuda_visible_devices.split(',')
            cuda_visible_devices = ','.join([
                cuda_visible_devices[int(g)] for g in gpu_assigned.split(',')
                if g.strip() != ''
            ])
        else:
            cuda_visible_devices = self.task.gpu_assigned

        cuda_visible_devices = cuda_visible_devices or ''

        env = {
            'MKL_NUM_THREADS': 1,
            'OMP_NUM_THREADS': 1,
            'CUDA_VISIBLE_DEVICES': cuda_visible_devices
        }
        env.update(executor.get('env', {}))

        for k, v in env.items():
            os.environ[k] = str(v)
            self.info(f'Set env. {k} = {v}')

    def check_status(self):
        self.info('check_status')

        assert self.dag is not None, 'You must fetch task with dag_rel'

        if self.task.status >= TaskStatus.InProgress.value:
            msg = f'Task = {self.task.id}. Status = {self.task.status}, ' \
                  f'before the execute_by_id invocation.'
            if app.current_task:
                msg += f' Request Id = {app.current_task.request.id}'
            self.error(msg)
            return True

    def change_status(self):
        self.info('change_status')

        self.task.computer_assigned = self.hostname
        self.task.pid = os.getpid()
        self.task.worker_index = self.worker_index
        self.task.docker_assigned = self.docker_img
        self.provider.change_status(self.task, TaskStatus.InProgress)

    def download(self):
        self.info('download')

        if not self.task.debug:
            folder = self.storage.download(task=self.id)
        else:
            folder = os.getcwd()

        os.chdir(folder)

        libraries = self.library_provider.dag(self.task.dag)
        executor_type = self.executor_type

        self.info('download. folder changed')

        mlcomp_executors_folder = join(dirname(abspath(__file__)), 'executors')
        mlcomp_base_folder = os.path.abspath(
            join(mlcomp_executors_folder, '../../../'))

        imported, was_installation = self.storage.import_executor(
            mlcomp_executors_folder, mlcomp_base_folder, executor_type)

        if not imported:
            imported, was_installation = self.storage.import_executor(
                folder, folder, executor_type, libraries)

            if not imported:
                raise Exception(f'Executor = {executor_type} not found')

        self.info('download. executor imported')

        if was_installation and not self.task.debug:
            if self.repeat_count > 0:
                self.info('was installation. '
                          'set task status to Queued. '
                          'And resending the task to a queue')
                self.task.status = TaskStatus.Queued.value
                self.provider.commit()

                try:
                    execute.apply_async((self.id, self.repeat_count - 1),
                                        queue=self.queue_personal,
                                        retry=False)
                except Exception:
                    pass
                finally:
                    sys.exit()

        assert Executor.is_registered(executor_type), \
            f'Executor {executor_type} was not found'

    def create_executor(self):
        self.info('create_executor')

        additional_info = yaml_load(self.task.additional_info) \
            if self.task.additional_info else dict()
        self.executor = Executor.from_config(executor=self.task.executor,
                                             config=self.config,
                                             additional_info=additional_info,
                                             session=self.session,
                                             logger=self.logger,
                                             logger_db=self.logger_db)

    def execute(self):
        self.info('execute start')

        res = self.executor(task=self.task,
                            task_provider=self.provider,
                            dag=self.dag)
        self.info('execute executor finished')

        res = res or {}
        self.task.result = yaml_dump(res)
        self.provider.commit()

        if 'stage' in res and 'stages' in res:
            index = res['stages'].index(res['stage'])
            if index < len(res['stages']) - 1:
                self.executor.info(f'stage = {res["stage"]} done. '
                                   f'Go to the stage = '
                                   f'{res["stages"][index + 1]}')

                time.sleep(3)

                self.executor.info(f'sending {(self.id, self.repeat_count)} '
                                   f'to {self.queue_personal}')

                self.task.status = TaskStatus.Queued.value
                self.provider.commit()

                execute.apply_async((self.id, self.repeat_count),
                                    queue=self.queue_personal,
                                    retry=False)
                return

        self.executor.step.finish()
        self.provider.change_status(self.task, TaskStatus.Success)

        self.info('execute end')

    def build(self):
        try:
            self.create_base()

            bad_status = self.check_status()
            if bad_status:
                return

            self.change_status()

            self.download()

            self.create_executor()

            self.execute()

        except Exception as e:
            step = self.executor.step.id if \
                (self.executor and self.executor.step) else None

            if Session.sqlalchemy_error(e):
                Session.cleanup(key='ExecuteBuilder')
                self.session = Session.create_session(key='ExecuteBuilder')
                self.logger.session = create_logger(self.session,
                                                    'ExecuteBuilder')

            self.error(traceback.format_exc(), step)
            if self.task.status <= TaskStatus.InProgress.value:
                self.provider.change_status(self.task, TaskStatus.Failed)
            raise e
        finally:
            if app.current_task:
                app.close()

            if self.exit:
                # noinspection PyProtectedMember
                os._exit(0)
Esempio n. 6
0
class Storage:
    def __init__(self,
                 session: Session,
                 logger=None,
                 component: ComponentType = None,
                 max_file_size: int = 10**5,
                 max_count=10**3):
        self.file_provider = FileProvider(session)
        self.provider = DagStorageProvider(session)
        self.task_provider = TaskProvider(session)
        self.library_provider = DagLibraryProvider(session)
        self.dag_provider = DagProvider(session)

        self.logger = logger
        self.component = component
        self.max_file_size = max_file_size
        self.max_count = max_count

    def log_info(self, message: str):
        if self.logger:
            self.logger.info(message, self.component)

    def copy_from(self, src: int, dag: Dag):
        storages = self.provider.query(DagStorage). \
            filter(DagStorage.dag == src). \
            all()
        libraries = self.library_provider.query(DagLibrary). \
            filter(DagLibrary.dag == src). \
            all()

        s_news = []
        for s in storages:
            s_new = DagStorage(dag=dag.id,
                               file=s.file,
                               path=s.path,
                               is_dir=s.is_dir)
            s_news.append(s_new)
        l_news = []
        for l in libraries:
            l_new = DagLibrary(dag=dag.id,
                               library=l.library,
                               version=l.version)
            l_news.append(l_new)

        self.provider.add_all(s_news)
        self.library_provider.add_all(l_news)

    def _build_spec(self, folder: str):
        ignore_file = os.path.join(folder, 'file.ignore.txt')
        if not os.path.exists(ignore_file):
            ignore_patterns = []
        else:
            ignore_patterns = read_lines(ignore_file)
        ignore_patterns.extend(
            ['log', '/data', '/models', '__pycache__', '*.ipynb'])

        return pathspec.PathSpec.from_lines(
            pathspec.patterns.GitWildMatchPattern, ignore_patterns)

    def upload(self, folder: str, dag: Dag, control_reqs: bool = True):
        self.log_info('upload started')
        hashs = self.file_provider.hashs(dag.project)
        self.log_info('hashes are retrieved')

        all_files = []
        spec = self._build_spec(folder)

        files = glob(os.path.join(folder, '**'))
        for file in files[:]:
            path = os.path.relpath(file, folder)
            if spec.match_file(path) or path == '.':
                continue
            if os.path.isdir(file):
                child_files = glob(os.path.join(folder, file, '**'),
                                   recursive=True)
                files.extend(child_files)

        if self.max_count and len(files) > self.max_count:
            raise Exception(f'files count = {len(files)} '
                            f'But max count = {self.max_count}')

        self.log_info('list of files formed')

        folders_to_add = []
        files_to_add = []
        files_storage_to_add = []

        total_size_added = 0

        for o in files:
            path = os.path.relpath(o, folder)
            if spec.match_file(path) or path == '.':
                continue

            if isdir(o):
                folder_to_add = DagStorage(dag=dag.id, path=path, is_dir=True)
                folders_to_add.append(folder_to_add)
                continue
            content = open(o, 'rb').read()
            size = sys.getsizeof(content)
            if self.max_file_size and size > self.max_file_size:
                raise Exception(
                    f'file = {o} has size {size}.'
                    f' But max size is set to {self.max_file_size}')
            md5 = hashlib.md5(content).hexdigest()

            all_files.append(o)

            if md5 not in hashs:
                file = File(md5=md5,
                            content=content,
                            project=dag.project,
                            dag=dag.id,
                            created=now())
                hashs[md5] = file
                files_to_add.append(file)
                total_size_added += size

            file_storage = DagStorage(dag=dag.id,
                                      path=path,
                                      file=hashs[md5],
                                      is_dir=False)
            files_storage_to_add.append(file_storage)

        self.log_info('inserting DagStorage folders')

        if len(folders_to_add) > 0:
            self.provider.bulk_save_objects(folders_to_add)

        self.log_info('inserting Files')

        if len(files_to_add) > 0:
            self.file_provider.bulk_save_objects(files_to_add,
                                                 return_defaults=True)

        self.log_info('inserting DagStorage Files')

        if len(files_storage_to_add) > 0:
            for file_storage in files_storage_to_add:
                if isinstance(file_storage.file, File):
                    # noinspection PyUnresolvedReferences
                    file_storage.file = file_storage.file.id

            self.provider.bulk_save_objects(files_storage_to_add)

        dag.file_size += total_size_added

        self.dag_provider.update()

        if INSTALL_DEPENDENCIES and control_reqs:
            reqs = control_requirements(folder, files=all_files)
            for name, rel, version in reqs:
                self.library_provider.add(
                    DagLibrary(dag=dag.id, library=name, version=version))

    def download_dag(self, dag: int, folder: str):
        os.makedirs(folder, exist_ok=True)

        items = self.provider.by_dag(dag)
        items = sorted(items, key=lambda x: x[1] is not None)
        for item, file in items:
            path = os.path.join(folder, item.path)
            if item.is_dir:
                os.makedirs(path, exist_ok=True)
            else:
                with open(path, 'wb') as f:
                    f.write(file.content)

    def download(self, task: int):
        task = self.task_provider.by_id(
            task, joinedload(Task.dag_rel, innerjoin=True))
        folder = join(TASK_FOLDER, str(task.id))
        self.download_dag(task.dag, folder)

        config = Config.from_yaml(task.dag_rel.config)
        info = config['info']
        try:
            data_folder = os.path.join(DATA_FOLDER, info['project'])
            os.makedirs(data_folder, exist_ok=True)

            os.symlink(data_folder,
                       os.path.join(folder, 'data'),
                       target_is_directory=True)
        except FileExistsError:
            pass

        try:
            model_folder = os.path.join(MODEL_FOLDER, info['project'])
            os.makedirs(model_folder, exist_ok=True)

            os.symlink(model_folder,
                       os.path.join(folder, 'models'),
                       target_is_directory=True)
        except FileExistsError:
            pass

        sys.path.insert(0, folder)
        return folder

    def import_executor(self,
                        folder: str,
                        base_folder: str,
                        executor: str,
                        libraries: List[Tuple] = None):

        sys.path.insert(0, base_folder)

        spec = self._build_spec(folder)
        was_installation = False

        folders = [
            p for p in glob(f'{folder}/*', recursive=True)
            if os.path.isdir(p) and not spec.match_file(p)
        ]
        folders += [folder]
        library_names = set(n for n, v in (libraries or []))
        library_versions = {n: v for n, v in (libraries or [])}

        for n in library_names:
            try:
                version = pkg_resources.get_distribution(n).version
                need_install = library_versions[n] != version
            except Exception:
                need_install = True

            if INSTALL_DEPENDENCIES and need_install:
                os.system(f'pip install {n}=={library_versions[n]}')
                was_installation = True

        def is_valid_class(cls: pyclbr.Class):
            return cls.name == executor or \
                   cls.name.lower() == executor or \
                   to_snake(cls.name) == executor

        def relative_name(path: str):
            rel = os.path.relpath(path, base_folder)
            parts = [str(p).split('.')[0] for p in rel.split(os.sep)]
            return '.'.join(parts)

        for (module_loader, module_name,
             ispkg) in pkgutil.iter_modules(folders):
            module = module_loader.find_module(module_name)
            rel_path = os.path.relpath(
                os.path.splitext(module.path)[0],
                base_folder).replace('/', '.')
            try:
                classes = pyclbr.readmodule(rel_path, path=[base_folder])
            except Exception:
                continue
            for k, v in classes.items():
                if is_valid_class(v):
                    importlib.import_module(relative_name(module.path))
                    return True, was_installation

        return False, was_installation
Esempio n. 7
0
 def __init__(self, session: Session):
     self.file_provider = FileProvider(session)
     self.provider = DagStorageProvider(session)
     self.task_provider = TaskProvider(session)
     self.library_provider = DagLibraryProvider(session)
Esempio n. 8
0
class Storage:
    def __init__(self, session: Session):
        self.file_provider = FileProvider(session)
        self.provider = DagStorageProvider(session)
        self.task_provider = TaskProvider(session)
        self.library_provider = DagLibraryProvider(session)

    def copy_from(self, src: int, dag: Dag):
        storages = self.provider.query(DagStorage). \
            filter(DagStorage.dag == src). \
            all()
        libraries = self.library_provider.query(DagLibrary). \
            filter(DagLibrary.dag == src). \
            all()

        s_news = []
        for s in storages:
            s_new = DagStorage(dag=dag.id,
                               file=s.file,
                               path=s.path,
                               is_dir=s.is_dir)
            s_news.append(s_new)
        l_news = []
        for l in libraries:
            l_new = DagLibrary(dag=dag.id,
                               library=l.library,
                               version=l.version)
            l_news.append(l_new)

        self.provider.add_all(s_news)
        self.library_provider.add_all(l_news)

    def _build_spec(self, folder: str):
        ignore_file = os.path.join(folder, 'file.ignore.txt')
        if not os.path.exists(ignore_file):
            ignore_patterns = []
        else:
            ignore_patterns = read_lines(ignore_file)
        ignore_patterns.extend(['log', 'data', 'models', '__pycache__'])

        return pathspec.PathSpec.from_lines(
            pathspec.patterns.GitWildMatchPattern, ignore_patterns)

    def upload(self, folder: str, dag: Dag, control_reqs: bool = True):
        hashs = self.file_provider.hashs(dag.project)

        files = []
        all_files = []
        spec = self._build_spec(folder)

        for o in glob(os.path.join(folder, '**'), recursive=True):
            path = os.path.relpath(o, folder)
            if spec.match_file(path) or path == '.':
                continue

            if isdir(o):
                self.provider.add(
                    DagStorage(dag=dag.id, path=path, is_dir=True))
                continue
            content = open(o, 'rb').read()
            md5 = hashlib.md5(content).hexdigest()

            all_files.append(o)

            if md5 in hashs:
                file_id = hashs[md5]
            else:
                file = File(md5=md5,
                            content=content,
                            project=dag.project,
                            dag=dag.id,
                            created=now())
                self.file_provider.add(file)
                file_id = file.id
                hashs[md5] = file.id
                files.append(o)

            self.provider.add(
                DagStorage(dag=dag.id, path=path, file=file_id, is_dir=False))

        if INSTALL_DEPENDENCIES and control_reqs:
            reqs = control_requirements(folder, files=all_files)
            for name, rel, version in reqs:
                self.library_provider.add(
                    DagLibrary(dag=dag.id, library=name, version=version))

    def download(self, task: int):
        task = self.task_provider.by_id(
            task, joinedload(Task.dag_rel, innerjoin=True))
        folder = join(TASK_FOLDER, str(task.id))
        os.makedirs(folder, exist_ok=True)
        items = self.provider.by_dag(task.dag)
        items = sorted(items, key=lambda x: x[1] is not None)
        for item, file in items:
            path = os.path.join(folder, item.path)
            if item.is_dir:
                os.makedirs(path, exist_ok=True)
            else:
                with open(path, 'wb') as f:
                    f.write(file.content)

        config = Config.from_yaml(task.dag_rel.config)
        info = config['info']
        try:
            data_folder = os.path.join(DATA_FOLDER, info['project'])
            os.makedirs(data_folder, exist_ok=True)

            os.symlink(data_folder,
                       os.path.join(folder, 'data'),
                       target_is_directory=True)
        except FileExistsError:
            pass

        try:
            model_folder = os.path.join(MODEL_FOLDER, info['project'])
            os.makedirs(model_folder, exist_ok=True)

            os.symlink(model_folder,
                       os.path.join(folder, 'models'),
                       target_is_directory=True)
        except FileExistsError:
            pass

        sys.path.insert(0, folder)
        return folder

    def import_executor(self,
                        folder: str,
                        base_folder: str,
                        executor: str,
                        libraries: List[Tuple] = None):

        sys.path.insert(0, base_folder)

        spec = self._build_spec(folder)
        was_installation = False

        folders = [
            p for p in glob(f'{folder}/*', recursive=True)
            if os.path.isdir(p) and not spec.match_file(p)
        ]
        folders += [folder]
        library_names = set(n for n, v in (libraries or []))
        library_versions = {n: v for n, v in (libraries or [])}

        for n in library_names:
            try:
                version = pkg_resources.get_distribution(n).version
                need_install = library_versions[n] != version
            except Exception:
                need_install = True

            if INSTALL_DEPENDENCIES and need_install:
                os.system(f'pip install {n}=={library_versions[n]}')
                was_installation = True

        def is_valid_class(cls: pyclbr.Class):
            super_names = get_super_names(cls)
            if 'Executor' not in super_names:
                return False

            return cls.name == executor or \
                cls.name.lower() == executor or \
                to_snake(cls.name) == executor

        def relative_name(path: str):
            rel = os.path.relpath(path, base_folder)
            parts = [str(p).split('.')[0] for p in rel.split(os.sep)]
            return '.'.join(parts)

        for (module_loader, module_name,
             ispkg) in pkgutil.iter_modules(folders):
            module = module_loader.find_module(module_name)
            module_folder = dirname(module.path)
            classes = pyclbr.readmodule(module_name, path=[module_folder])
            for k, v in classes.items():
                if is_valid_class(v):
                    importlib.import_module(relative_name(module.path))

                    return True, was_installation

        return False, was_installation
Esempio n. 9
0
class ExecuteBuilder:
    def __init__(self, id: int, repeat_count: int = 1, exit=True):
        self.session = Session.create_session(key='ExecuteBuilder')
        self.id = id
        self.repeat_count = repeat_count
        self.logger = create_logger(self.session, 'ExecuteBuilder')
        self.exit = exit

        self.provider = None
        self.library_provider = None
        self.storage = None
        self.task = None
        self.dag = None
        self.executor = None
        self.hostname = None
        self.docker_img = None
        self.worker_index = None
        self.queue_personal = None
        self.config = None
        self.executor_type = None

    def info(self, msg: str, step=None):
        self.logger.info(msg, ComponentType.Worker, self.hostname, self.id,
                         step)

    def error(self, msg: str, step=None):
        self.logger.error(msg, ComponentType.Worker, self.hostname, self.id,
                          step)

    def warning(self, msg: str, step=None):
        self.logger.warning(msg, ComponentType.Worker, self.hostname, self.id,
                            step)

    def debug(self, msg: str, step=None):
        self.logger.debug(msg, ComponentType.Worker, self.hostname, self.id,
                          step)

    def create_base(self):
        self.info('create_base')

        self.provider = TaskProvider(self.session)
        self.library_provider = DagLibraryProvider(self.session)
        self.storage = Storage(self.session)

        self.task = self.provider.by_id(
            self.id, joinedload(Task.dag_rel, innerjoin=True))
        if not self.task:
            raise Exception(f'task with id = {self.id} is not found')

        self.dag = self.task.dag_rel
        self.executor = None
        self.hostname = socket.gethostname()

        self.docker_img = DOCKER_IMG
        self.worker_index = os.getenv('WORKER_INDEX', -1)

        self.queue_personal = f'{self.hostname}_{self.docker_img}_' \
                              f'{self.worker_index}'

        self.config = Config.from_yaml(self.dag.config)
        self.executor_type = self.config['executors'][
            self.task.executor]['type']

    def check_status(self):
        self.info('check_status')

        assert self.dag is not None, 'You must fetch task with dag_rel'

        if self.task.status > TaskStatus.InProgress.value:
            msg = f'Task = {self.task.id}. Status = {self.task.status}, ' \
                  f'before the execute_by_id invocation'
            self.error(msg)
            raise Exception(msg)

    def change_status(self):
        self.info('change_status')

        self.task.computer_assigned = self.hostname
        self.task.pid = os.getpid()
        self.task.worker_index = self.worker_index
        self.task.docker_assigned = self.docker_img
        self.provider.change_status(self.task, TaskStatus.InProgress)

    def download(self):
        self.info('download')

        if not self.task.debug:
            folder = self.storage.download(task=self.id)
        else:
            folder = os.getcwd()

        os.chdir(folder)

        libraries = self.library_provider.dag(self.task.dag)
        executor_type = self.executor_type

        mlcomp_executors_folder = join(dirname(abspath(__file__)), 'executors')
        mlcomp_base_folder = os.path.abspath(
            join(mlcomp_executors_folder, '../../../'))

        imported, was_installation = self.storage.import_executor(
            mlcomp_executors_folder, mlcomp_base_folder, executor_type)

        if not imported:
            imported, was_installation = self.storage.import_executor(
                folder, folder, executor_type, libraries)

            if not imported:
                raise Exception(f'Executor = {executor_type} not found')

        if was_installation and not self.task.debug:
            if self.repeat_count > 0:
                try:
                    self.warning(traceback.format_exc())
                    execute.apply_async((self.id, self.repeat_count - 1),
                                        queue=self.queue_personal)
                except Exception:
                    pass
                finally:
                    sys.exit()

        assert Executor.is_registered(executor_type), \
            f'Executor {executor_type} was not found'

    def create_executor(self):
        self.info('create_executor')

        additional_info = yaml_load(self.task.additional_info) \
            if self.task.additional_info else dict()
        self.executor = Executor.from_config(executor=self.task.executor,
                                             config=self.config,
                                             additional_info=additional_info,
                                             session=self.session,
                                             logger=self.logger)

    def execute(self):
        self.info('execute start')

        res = self.executor(task=self.task,
                            task_provider=self.provider,
                            dag=self.dag)
        self.info('execute executor finished')

        res = res or {}
        self.task.result = yaml_dump(res)
        self.provider.commit()

        if 'stage' in res and 'stages' in res:
            index = res['stages'].index(res['stage'])
            if index < len(res['stages']) - 1:
                self.executor.info(f'stage = {res["stage"]} done. '
                                   f'Go to the stage = '
                                   f'{res["stages"][index + 1]}')

                time.sleep(3)

                self.executor.info(f'sending {(self.id, self.repeat_count)} '
                                   f'to {self.queue_personal}')

                execute.apply_async((self.id, self.repeat_count),
                                    queue=self.queue_personal)
                return

        self.executor.step.finish()
        self.provider.change_status(self.task, TaskStatus.Success)

        self.info('execute end')

    def build(self):
        try:
            self.create_base()

            self.check_status()

            self.change_status()

            self.download()

            self.create_executor()

            self.execute()

        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(key='ExecuteBuilder')
                self.session = Session.create_session(key='ExecuteBuilder')
                self.logger.session = create_logger(self.session,
                                                    'ExecuteBuilder')

            step = self.executor.step.id if \
                (self.executor and self.executor.step) else None

            self.error(traceback.format_exc(), step)
            self.provider.change_status(self.task, TaskStatus.Failed)
            raise e
        finally:
            if app.current_task:
                app.current_task.update_state(state=states.SUCCESS)
                app.close()

            if self.exit:
                # noinspection PyProtectedMember
                os._exit(0)