コード例 #1
0
ファイル: dag.py プロジェクト: ASRlytics/mlcomp
 def duration(self, t: Task):
     if not t.started:
         return duration_format(0)
     finished = (
         t.finished if t.finished else now()
     )
     delta = (finished - t.started).total_seconds()
     return duration_format(delta)
コード例 #2
0
    def get(self, filter: dict, options: PaginatorOptions):
        query = self.query(Task, Project.name). \
            join(Dag, Dag.id == Task.dag). \
            join(Project, Project.id == Dag.project). \
            options(joinedload(Task.dag_rel, innerjoin=True))

        query = self._get_filter(query, filter)

        total = query.count()
        paginator = self.paginator(query, options)
        res = []

        for p, project_name in paginator.all():
            if p.dag_rel is None:
                continue

            item = {**self.to_dict(p, rules=('-additional_info', ))}
            item['status'] = to_snake(TaskStatus(item['status']).name)
            item['type'] = to_snake(TaskType(item['type']).name)
            item['dag_rel']['project'] = {
                'id': item['dag_rel']['project'],
                'name': project_name
            }
            if p.started is None:
                delta = 0
            elif p.status == TaskStatus.InProgress.value:
                delta = (now() - p.started).total_seconds()
            else:
                finish = (p.finished or p.last_activity)
                delta = (finish - p.started).total_seconds()
            item['duration'] = duration_format(delta)
            if p.dag_rel is not None:
                res.append(item)

        if filter.get('report'):
            tasks_within_report = self.query(
                ReportTasks.task
            ).filter(ReportTasks.report == int(filter['report']))
            tasks_within_report = {t[0] for t in tasks_within_report}
            for r in res:
                r['report_full'] = r['id'] in tasks_within_report

        projects = self.query(Project.name, Project.id). \
            order_by(Project.id.desc()). \
            limit(20). \
            all()
        dags = self.query(Dag.name, Dag.id). \
            order_by(Dag.id.desc()). \
            limit(20). \
            all()
        projects = [{'name': name, 'id': id} for name, id in projects]
        dags = [{'name': name, 'id': id} for name, id in dags]

        dags_model = self.query(Dag.name, Dag.id, Dag.project). \
            filter(Dag.type == DagType.Pipe.value). \
            order_by(Dag.id.desc()). \
            all()

        dags_model_dict = []
        used_dag_names = set()

        for name, id, project in dags_model:
            if name in used_dag_names:
                continue

            dag = {'name': name, 'id': id, 'project': project}
            dags_model_dict.append(dag)
            used_dag_names.add(name)

        return {
            'total': total,
            'data': res,
            'projects': projects,
            'dags': dags,
            'dags_model': dags_model_dict
        }
コード例 #3
0
ファイル: dag.py プロジェクト: ASRlytics/mlcomp
    def get(self, filter: dict, options: PaginatorOptions = None):
        task_status = []
        for e in TaskStatus:
            task_status.append(
                func.sum(
                    case(
                        whens=[(Task.status == e.value, 1)],
                        else_=0
                    ).label(e.name)
                )
            )

        last_activity = func.max(Task.last_activity).label('last_activity')
        funcs = [
            func.count(Task.id).label('task_count'), last_activity,
            func.min(Task.started).label('started'),
            func.max(Task.finished).label('finished')
        ]

        query = self.query(Dag, Project.name, *funcs,
                           *task_status).join(Project)
        query = self._get_filter(query, filter, last_activity)

        status_clauses = []
        for agg, e in zip(task_status, TaskStatus):
            if filter.get('status', {}).get(to_snake(e.name)):
                status_clauses.append(agg > 0)
        if len(status_clauses) > 0:
            query = query.having(or_(*status_clauses))

        query = query.join(Task, isouter=True).group_by(Dag.id, Project.name)
        # Do not include service tasks
        query = query.filter(Task.type < TaskType.Service.value)

        total = query.count()
        paginator = self.paginator(query, options) if options else query
        res = []
        rules = ('-tasks.dag_rel', )
        for dag, \
                project_name, \
                task_count, \
                last_activity, \
                started, \
                finished, \
                *(task_status) in paginator.all():

            items = self.to_dict(dag, rules=rules).items()
            # noinspection PyDictCreation
            r = {
                'task_count': task_count,
                'last_activity': last_activity,
                'started': started,
                'finished': finished,
                **{k: v
                   for k, v in items if k not in ['tasks', 'config']}
            }
            r['project'] = {'name': project_name}

            r['task_statuses'] = [
                {
                    'name': to_snake(e.name),
                    'count': s
                } for e, s in zip(TaskStatus, task_status)
            ]
            r['last_activity'] = self.serializer.serialize_datetime(
                r['last_activity']
            ) if r['last_activity'] else None
            r['started'] = self.serializer.serialize_datetime(r['started']) \
                if r['started'] else None
            r['finished'] = self.serializer.serialize_datetime(
                r['finished']
            ) if r['finished'] else None

            if task_status[TaskStatus.InProgress.value] > 0:
                delta = (now() - started).total_seconds()
            elif sum(
                task_status[TaskStatus.InProgress.value:]
            ) == 0 or not started or not last_activity:
                delta = 0
            else:
                delta = (last_activity - started).total_seconds()

            r['duration'] = duration_format(delta)
            res.append(r)

        if filter.get('report'):
            dag_ids = [r['id'] for r in res]
            tasks_dags = self.query(Task.id, Task.dag). \
                filter(Task.type <= TaskType.Train.value). \
                filter(Task.dag.in_(dag_ids)). \
                all()

            tasks_within_report = self.query(ReportTasks.task). \
                filter(ReportTasks.report == int(filter['report']))

            tasks_within_report = {t[0] for t in tasks_within_report}
            dags_not_full_included = {
                d
                for t, d in tasks_dags if t not in tasks_within_report
            }
            for r in res:
                r['report_full'] = r['id'] not in dags_not_full_included

        projects = self.query(Project.name, Project.id). \
            order_by(Project.id.desc()). \
            limit(20). \
            all()

        projects = [{'name': name, 'id': id} for name, id in projects]
        return {'total': total, 'data': res, 'projects': projects}