def logs(statuses, folder: str = None): if folder is not None: for file in glob(join(LOG_FOLDER, '*')): shutil.copy(file, join(folder, basename(file))) print('logs formed') if statuses.query('status == "ERROR"').shape[0] > 0: return log_provider = LogProvider() errors = log_provider.last(count=1000, levels=[LogStatus.Error.value]) service_components = [ ComponentType.Supervisor.value, ComponentType.API.value, ComponentType.WorkerSupervisor.value ] services = log_provider.last(count=1000, components=service_components) logs = errors + services rows = [] for l, _ in logs: rows.append({ 'status': to_snake(LogStatus(l.level).name), 'component': to_snake(ComponentType(l.component).name), 'time': l.time, 'message': l.message, }) df = pd.DataFrame(rows) df.to_csv(join(folder, 'logs_db.csv'), index=False) return df
def graph(self, id: int): tasks = self.query(Task). \ filter(Task.dag == id). \ filter(Task.type <= TaskType.Train.value). \ all() task_ids = [t.id for t in tasks] dep = self.query(TaskDependence).filter( TaskDependence.task_id.in_(task_ids)).all() task_by_id = {t.id: t for t in tasks} def label(t: Task): res = [t.executor] if t.status >= TaskStatus.InProgress.value: res.append(self.duration(t)) res.append(f'{t.current_step if t.current_step else ""}/' f'{t.steps if t.steps else ""}') return '\n'.join(res) nodes = [{ 'id': t.id, 'label': label(t), 'name': t.name, 'status': to_snake(TaskStatus(t.status).name) } for t in tasks] edges = [{ 'from': d.depend_id, 'to': d.task_id, 'status': to_snake(TaskStatus(task_by_id[d.depend_id].status).name) } for d in dep] return {'nodes': nodes, 'edges': edges}
def describe_logs(dag: int, axis, max_log_text: int = None, log_count: int = 5, col_withds: List[float] = None): columns = ['Component', 'Level', 'Task', 'Time', 'Text'] provider = LogProvider() logs = provider.last(log_count, dag=dag) res = [] cells = [] cells_colours = [] for log, task_id in logs: component = to_snake(ComponentType(log.component).name) level = log.level level = 'debug' if level == 10 else 'info' \ if level == 20 else 'warning' \ if level == 30 else 'error' message = log.message if max_log_text: message = message[:max_log_text] log_cells = [ component, level, str(task_id), log.time.strftime('%m.%d %H:%M:%S'), message ] cells.append(log_cells) level_color = 'lightblue' if level == 'info' else 'lightyellow' \ if level == 'warning' else 'red' if level == 'error' else 'white' log_colours = ['white', level_color, 'white', 'white', 'white'] cells_colours.append(log_colours) if level == 'error': res.append(log) col_withds = col_withds or [0.2, 0.1, 0.25, 0.2, 0.45] if len(cells) > 0: table = axis.table(cellText=cells, colLabels=columns, cellColours=cells_colours, cellLoc='center', colWidths=col_withds, bbox=[0, 0, 1, 1.0], loc='center') table.auto_set_font_size(False) table.set_fontsize(14) axis.set_xticks([]) axis.axis('off') axis.set_title('Logs') return res
def is_valid_class(cls: pyclbr.Class): super_names = get_super_names(cls) if 'Executor' not in super_names: return False return cls.name == executor or \ cls.name.lower() == executor or \ to_snake(cls.name) == executor
def get(self, filter: dict, options: PaginatorOptions): query = self.query(Log, Step, Task). \ join(Step, Step.id == Log.step, isouter=True). \ join(Task, Task.id == Log.task, isouter=True) if filter.get('message'): query = query.filter(Log.message.contains(filter['message'])) if filter.get('dag'): query = query.filter(Task.dag == filter['dag']) if filter.get('task'): child_tasks = self.query(Task.id ).filter(Task.parent == filter['task'] ).all() child_tasks = [c[0] for c in child_tasks] child_tasks.append(filter['task']) query = query.filter(Task.id.in_(child_tasks)) if len(filter.get('components', [])) > 0: query = query.filter(Log.component.in_(filter['components'])) if filter.get('computer'): query = query.filter(Computer.name == filter['computer']) if len(filter.get('levels', [])) > 0: query = query.filter(Log.level.in_(filter['levels'])) if filter.get('task_name'): query = query.filter(Task.name.like(f'%{filter["task_name"]}%')) if filter.get('step_name'): query = query.filter(Step.name.like(f'%{filter["step_name"]}%')) if filter.get('step'): query = query.filter(Step.id == filter['step']) total = query.count() data = [] for log, step, task in self.paginator(query, options): item = { 'id': log.id, 'message': log.message.split('\n'), 'module': log.module, 'line': log.line, 'time': self.serializer.serialize_datetime(log.time), 'level': log_name(log.level), 'component': to_snake(ComponentType(log.component).name), 'computer': log.computer, 'step': self.to_dict(step) if step else None, 'task': self.to_dict(task, rules=('-additional_info', )) if task else None } data.append(item) return {'total': total, 'data': data}
def task_stop(): data = request_data() provider = TaskProvider(_write_session) task = provider.by_id(data['id'], joinedload(Task.dag_rel, innerjoin=True)) dag = task.dag_rel status = celery_tasks.stop(logger, _write_session, task, dag) child_tasks = provider.children(task.id) for t in child_tasks: celery_tasks.stop(logger, _write_session, t, dag) return {'status': to_snake(TaskStatus(status).name)}
def step_info(self, step): step, *log_status = step duration = ((step.finished if step.finished else now()) - step.started) res = { 'id': step.id, 'name': step.name, 'level': step.level, 'duration': duration.total_seconds(), 'log_statuses': [{ 'name': to_snake(e.name), 'count': s } for e, s in zip(LogStatus, log_status)] } return res
def register(cls): Executor._child[cls.__name__] = cls Executor._child[cls.__name__.lower()] = cls Executor._child[to_snake(cls.__name__)] = cls return cls
def get(self, filter: dict, options: PaginatorOptions): query = self.query(Task, Project.name). \ join(Dag, Dag.id == Task.dag). \ join(Project, Project.id == Dag.project). \ options(joinedload(Task.dag_rel, innerjoin=True)) query = self._get_filter(query, filter) total = query.count() paginator = self.paginator(query, options) res = [] for p, project_name in paginator.all(): if p.dag_rel is None: continue item = {**self.to_dict(p, rules=('-additional_info', ))} item['status'] = to_snake(TaskStatus(item['status']).name) item['type'] = to_snake(TaskType(item['type']).name) item['dag_rel']['project'] = { 'id': item['dag_rel']['project'], 'name': project_name } if p.started is None: delta = 0 elif p.status == TaskStatus.InProgress.value: delta = (now() - p.started).total_seconds() else: finish = (p.finished or p.last_activity) delta = (finish - p.started).total_seconds() item['duration'] = duration_format(delta) if p.dag_rel is not None: res.append(item) if filter.get('report'): tasks_within_report = self.query( ReportTasks.task ).filter(ReportTasks.report == int(filter['report'])) tasks_within_report = {t[0] for t in tasks_within_report} for r in res: r['report_full'] = r['id'] in tasks_within_report projects = self.query(Project.name, Project.id). \ order_by(Project.id.desc()). \ limit(20). \ all() dags = self.query(Dag.name, Dag.id). \ order_by(Dag.id.desc()). \ limit(20). \ all() projects = [{'name': name, 'id': id} for name, id in projects] dags = [{'name': name, 'id': id} for name, id in dags] dags_model = self.query(Dag.name, Dag.id, Dag.project). \ filter(Dag.type == DagType.Pipe.value). \ order_by(Dag.id.desc()). \ all() dags_model_dict = [] used_dag_names = set() for name, id, project in dags_model: if name in used_dag_names: continue dag = {'name': name, 'id': id, 'project': project} dags_model_dict.append(dag) used_dag_names.add(name) return { 'total': total, 'data': res, 'projects': projects, 'dags': dags, 'dags_model': dags_model_dict }
def is_valid_class(cls: pyclbr.Class): return cls.name == executor or \ cls.name.lower() == executor or \ to_snake(cls.name) == executor
def get(self, filter: dict, options: PaginatorOptions = None): task_status = [] for e in TaskStatus: task_status.append( func.sum( case( whens=[(Task.status == e.value, 1)], else_=0 ).label(e.name) ) ) last_activity = func.max(Task.last_activity).label('last_activity') funcs = [ func.count(Task.id).label('task_count'), last_activity, func.min(Task.started).label('started'), func.max(Task.finished).label('finished') ] query = self.query(Dag, Project.name, *funcs, *task_status).join(Project) query = self._get_filter(query, filter, last_activity) status_clauses = [] for agg, e in zip(task_status, TaskStatus): if filter.get('status', {}).get(to_snake(e.name)): status_clauses.append(agg > 0) if len(status_clauses) > 0: query = query.having(or_(*status_clauses)) query = query.join(Task, isouter=True).group_by(Dag.id, Project.name) # Do not include service tasks query = query.filter(Task.type < TaskType.Service.value) total = query.count() paginator = self.paginator(query, options) if options else query res = [] rules = ('-tasks.dag_rel', ) for dag, \ project_name, \ task_count, \ last_activity, \ started, \ finished, \ *(task_status) in paginator.all(): items = self.to_dict(dag, rules=rules).items() # noinspection PyDictCreation r = { 'task_count': task_count, 'last_activity': last_activity, 'started': started, 'finished': finished, **{k: v for k, v in items if k not in ['tasks', 'config']} } r['project'] = {'name': project_name} r['task_statuses'] = [ { 'name': to_snake(e.name), 'count': s } for e, s in zip(TaskStatus, task_status) ] r['last_activity'] = self.serializer.serialize_datetime( r['last_activity'] ) if r['last_activity'] else None r['started'] = self.serializer.serialize_datetime(r['started']) \ if r['started'] else None r['finished'] = self.serializer.serialize_datetime( r['finished'] ) if r['finished'] else None if task_status[TaskStatus.InProgress.value] > 0: delta = (now() - started).total_seconds() elif sum( task_status[TaskStatus.InProgress.value:] ) == 0 or not started or not last_activity: delta = 0 else: delta = (last_activity - started).total_seconds() r['duration'] = duration_format(delta) res.append(r) if filter.get('report'): dag_ids = [r['id'] for r in res] tasks_dags = self.query(Task.id, Task.dag). \ filter(Task.type <= TaskType.Train.value). \ filter(Task.dag.in_(dag_ids)). \ all() tasks_within_report = self.query(ReportTasks.task). \ filter(ReportTasks.report == int(filter['report'])) tasks_within_report = {t[0] for t in tasks_within_report} dags_not_full_included = { d for t, d in tasks_dags if t not in tasks_within_report } for r in res: r['report_full'] = r['id'] not in dags_not_full_included projects = self.query(Project.name, Project.id). \ order_by(Project.id.desc()). \ limit(20). \ all() projects = [{'name': name, 'id': id} for name, id in projects] return {'total': total, 'data': res, 'projects': projects}
def describe_tasks(dag: int, axis): provider = TaskProvider() columns = ['Id', 'Started', 'Duration', 'Step', 'Status'] cells = [] cells_colours = [] tasks = provider.by_dag(dag) status_colors = { 'not_ran': 'gray', 'queued': 'lightblue', 'in_progress': 'lime', 'failed': '#e83217', 'stopped': '#cb88ea', 'skipped': 'orange', 'success': 'green' } finish = True for task in tasks: started = '' duration = '' if task.status <= TaskStatus.InProgress.value: finish = False if task.started: started = task.started.strftime('%m.%d %H:%M:%S') if task.finished: duration = (task.finished - task.started).total_seconds() else: duration = (now() - task.started).total_seconds() if duration > 3600: duration = f'{int(duration // 3600)} hours ' \ f'{int((duration % 3600) // 60)} min' \ f' {int(duration % 60)} sec' elif duration > 60: duration = f'{int(duration // 60)} min' \ f' {int(duration % 60)} sec' else: duration = f'{int(duration)} sec' status = to_snake(TaskStatus(task.status).name) status_color = status_colors[status] task_cells = [ str(task.id), started, duration, task.current_step or '1', status ] task_colors = ['white', 'white', 'white', 'white', status_color] cells.append(task_cells) cells_colours.append(task_colors) table = axis.table(cellText=cells, colLabels=columns, cellColours=cells_colours, cellLoc='center', colWidths=[0.2, 0.3, 0.4, 0.1, 0.2], bbox=[0, 0, 1.0, 1.0], loc='center') table.auto_set_font_size(False) table.set_fontsize(14) axis.set_xticks([]) axis.axis('off') axis.set_title('Tasks') return finish
def register(cls): Interface._child[cls.__name__] = cls Interface._child[cls.__name__.lower()] = cls Interface._child[to_snake(cls.__name__)] = cls return cls
def names_snake(cls): return [to_snake(n) for n in cls.names()]