コード例 #1
0
def execute(config: str, debug: bool, params):
    check_statuses()

    _create_computer()
    _create_docker()

    # Fail all InProgress Tasks
    logger = create_logger(_session, __name__)

    provider = TaskProvider(_session)
    step_provider = StepProvider(_session)

    for t in provider.by_status(TaskStatus.InProgress,
                                worker_index=WORKER_INDEX):
        step = step_provider.last_for_task(t.id)
        logger.error(
            f'Task Id = {t.id} was in InProgress state '
            f'when another tasks arrived to the same worker',
            ComponentType.Worker, t.computer_assigned, t.id, step)
        provider.change_status(t, TaskStatus.Failed)

    # Create dags
    dags = _dag(config, debug, params=params)
    for dag in dags:
        for ids in dag.values():
            for id in ids:
                task = provider.by_id(id)
                task.gpu_assigned = ','.join(
                    [str(i) for i in range(torch.cuda.device_count())])

                provider.commit()
                execute_by_id(id, exit=False)
コード例 #2
0
def execute(config: str, debug: bool):
    _create_computer()

    # Fail all InProgress Tasks
    logger = create_logger(_session, __name__)

    provider = TaskProvider(_session)
    step_provider = StepProvider(_session)

    for t in provider.by_status(TaskStatus.InProgress,
                                worker_index=WORKER_INDEX):
        step = step_provider.last_for_task(t.id)
        logger.error(
            f'Task Id = {t.id} was in InProgress state '
            f'when another tasks arrived to the same worker',
            ComponentType.Worker, t.computer_assigned, t.id, step)
        provider.change_status(t, TaskStatus.Failed)

    # Create dag
    created_dag = _dag(config, debug)
    for ids in created_dag.values():
        for id in ids:
            task = provider.by_id(id)
            task.gpu_assigned = ','.join(
                [str(i) for i, _ in enumerate(GPUtil.getGPUs())])

            provider.commit()
            execute_by_id(id, exit=False)
コード例 #3
0
 def __init__(self, session: Session, logger, logger_db, task: Task,
              task_provider: TaskProvider):
     self.log_provider = LogProvider(session)
     self.step_provider = StepProvider(session)
     self.task_provider = task_provider
     self.task = task
     self.children = []
     self.step = None
     self.logger = logger
     self.logger_db = logger_db
コード例 #4
0
ファイル: app.py プロジェクト: xang1234/mlcomp
def steps():
    id = request_data()
    provider = StepProvider(_read_session)
    res = provider.get(id)
    return res
コード例 #5
0
class StepWrap:
    def __init__(
            self, session: Session, logger, task: Task,
            task_provider: TaskProvider
    ):
        self.log_provider = LogProvider(session)
        self.step_provider = StepProvider(session)
        self.task_provider = task_provider
        self.task = task
        self.children = []
        self.step = None
        self.logger = logger

    @property
    def id(self):
        return self.step.id

    def enter(self):
        task = self.task if not self.task.parent else self.task_provider.by_id(
            self.task.parent
        )
        self.children = self.step_provider.unfinished(task.id)
        if len(self.children) == 0:
            self.step = self.start(0, 'main', 0)
        else:
            self.step = self.children[-1]

    def _finish(self):
        if len(self.children) == 0:
            return
        step = self.children.pop()
        step.finished = now()
        self.step_provider.update()
        self.step = self.children[-1] if len(self.children) > 0 else step

        self.debug('End of the step')

    def finish(self):
        while len(self.children) > 0:
            self._finish()

    def start(self, level: int, name: str = None, index: int = None):
        task = self.task if not self.task.parent else self.task_provider.by_id(
            self.task.parent
        )

        if index is None and task.current_step:
            parts = task.current_step.split('.')
            if len(parts) >= level:
                index = int(parts[level - 1])

        if self.step and index == self.step.index and self.step.level == level:
            return

        if self.step is not None:
            diff = level - self.step.level
            assert level > 0, 'level must be positive'
            assert diff <= 1, \
                f'Level {level} can not be started after {self.step.level}'

            if diff <= 0:
                for _ in range(abs(diff) + 1):
                    self._finish()

        step = Step(
            level=level,
            name=name or '',
            started=now(),
            task=task.id,
            index=index or 0
        )
        self.step_provider.add(step)
        self.children.append(step)
        self.step = step

        task.current_step = '.'.join(
            [
                str(c.index + 1)
                for c in self.children[1:]
            ]
        )
        self.task_provider.commit()

        self.debug('Begin of the step')

        return step

    def end(self, level: int):
        diff = level - self.step.level
        assert diff <= 0, 'you can end only the same step or lower'
        for i in range(abs(diff) + 1):
            self._finish()

    def debug(self, message: str):
        self.logger.debug(
            message, ComponentType.Worker, self.task.computer_assigned,
            self.task.id, self.step.id
        )

    def info(self, message: str):
        self.logger.info(
            message, ComponentType.Worker, self.task.computer_assigned,
            self.task.id, self.step.id
        )

    def warning(self, message: str):
        self.logger.warning(
            message, ComponentType.Worker, self.task.computer_assigned,
            self.task.id, self.step.id
        )

    def error(self, message: str):
        self.logger.error(
            message, ComponentType.Worker, self.task.computer_assigned,
            self.task.id, self.step.id
        )
コード例 #6
0
ファイル: signals.py プロジェクト: xyuan/mlcomp
def log_before_insert(mapper, connection, target):
    if target.step is None:
        return
    step = StepProvider(_session).by_id(target.step)
    TaskProvider(_session).update_last_activity(step.task)