def download(self, task: int): task = self.task_provider.by_id( task, joinedload(Task.dag_rel, innerjoin=True)) folder = join(TASK_FOLDER, str(task.id)) self.download_dag(task.dag, folder) config = Config.from_yaml(task.dag_rel.config) info = config['info'] try: data_folder = os.path.join(DATA_FOLDER, info['project']) os.makedirs(data_folder, exist_ok=True) os.symlink(data_folder, os.path.join(folder, 'data'), target_is_directory=True) except FileExistsError: pass try: model_folder = os.path.join(MODEL_FOLDER, info['project']) os.makedirs(model_folder, exist_ok=True) os.symlink(model_folder, os.path.join(folder, 'models'), target_is_directory=True) except FileExistsError: pass sys.path.insert(0, folder) return folder
def download(self, task: int): task = self.task_provider.by_id( task, joinedload(Task.dag_rel, innerjoin=True)) folder = join(TASK_FOLDER, str(task.id)) os.makedirs(folder, exist_ok=True) items = self.provider.by_dag(task.dag) items = sorted(items, key=lambda x: x[1] is not None) for item, file in items: path = os.path.join(folder, item.path) if item.is_dir: os.makedirs(path, exist_ok=True) else: with open(path, 'wb') as f: f.write(file.content) config = Config.from_yaml(task.dag_rel.config) info = config['info'] try: data_folder = os.path.join(DATA_FOLDER, info['project']) os.makedirs(data_folder, exist_ok=True) os.symlink(data_folder, os.path.join(folder, 'data')) except FileExistsError: pass try: model_folder = os.path.join(MODEL_FOLDER, info['project']) os.makedirs(model_folder, exist_ok=True) os.symlink(model_folder, os.path.join(folder, 'models')) except FileExistsError: pass sys.path.insert(0, folder) return folder
def create_base(self): self.info('create_base') self.provider = TaskProvider(self.session) self.library_provider = DagLibraryProvider(self.session) self.storage = Storage(self.session) self.task = self.provider.by_id( self.id, joinedload(Task.dag_rel, innerjoin=True)) if not self.task: raise Exception(f'task with id = {self.id} is not found') self.dag = self.task.dag_rel self.executor = None self.hostname = socket.gethostname() self.docker_img = DOCKER_IMG self.worker_index = os.getenv('WORKER_INDEX', -1) self.queue_personal = f'{self.hostname}_{self.docker_img}_' \ f'{self.worker_index}' self.config = Config.from_yaml(self.dag.config) self.executor_type = self.config['executors'][ self.task.executor]['type']
def create_base(self): self.info('create_base') if app.current_task: app.current_task.update_state(state=states.SUCCESS) app.control.revoke(app.current_task.request.id, terminate=True) self.provider = TaskProvider(self.session) self.library_provider = DagLibraryProvider(self.session) self.storage = Storage(self.session) self.task = self.provider.by_id( self.id, joinedload(Task.dag_rel, innerjoin=True)) if not self.task: raise Exception(f'task with id = {self.id} is not found') self.dag = self.task.dag_rel self.executor = None self.hostname = socket.gethostname() self.docker_img = DOCKER_IMG self.worker_index = os.getenv('WORKER_INDEX', -1) self.queue_personal = f'{self.hostname}_{self.docker_img}_' \ f'{self.worker_index}' self.config = Config.from_yaml(self.dag.config) set_global_seed(self.config['info'].get('seed', 0)) self.executor_type = self.config['executors'][ self.task.executor]['type'] executor = self.config['executors'][self.task.executor] if os.getenv('CUDA_VISIBLE_DEVICES', '').strip() != '': cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '').split(',') self.task.gpu_assigned = ','.join([ cuda_visible_devices[int(g)] for g in (self.task.gpu_assigned or '').split(',') ]) cuda_visible_devices = self.task.gpu_assigned else: cuda_visible_devices = self.task.gpu_assigned cuda_visible_devices = cuda_visible_devices or '' env = { 'MKL_NUM_THREADS': 1, 'OMP_NUM_THREADS': 1, 'CUDA_VISIBLE_DEVICES': cuda_visible_devices } env.update(executor.get('env', {})) for k, v in env.items(): os.environ[k] = str(v) self.info(f'Set env. {k} = {v}')
def dag_model_start(session: Session, data: dict): provider = ModelProvider(session) model = provider.by_id(data['model_id']) dag_provider = DagProvider(session) dag = dag_provider.by_id(data['dag'], joined_load=[Dag.project_rel]) project = dag.project_rel src_config = Config.from_yaml(dag.config) pipe = src_config['pipes'][data['pipe']['name']] equations = yaml_load(model.equations) versions = data['pipe']['versions'] if len(versions) > 0: version = data['pipe']['version'] pipe_equations = yaml_load(version['equations']) found_version = versions[0] for v in versions: if v['name'] == version['name']: found_version = v break found_version['used'] = now() if len(pipe) == 1: pipe[list(pipe)[0]].update(pipe_equations) else: pipe.update(pipe_equations) equations[data['pipe']['name']] = versions model.equations = yaml_dump(equations) for v in pipe.values(): v['model_id'] = model.id v['model_name'] = model.name config = { 'info': { 'name': data['pipe']['name'], 'project': project.name }, 'executors': pipe } if model.dag: old_dag = dag_provider.by_id(model.dag) if old_dag.name != dag.name: model.dag = dag.id else: model.dag = dag.id provider.commit() dag_standard(session=session, config=config, debug=False, upload_files=False, copy_files_from=data['dag'])
def model_start_begin(self, model_id: int): model = self.by_id(model_id) models_dags = self.query(Dag). \ filter(Dag.type == DagType.Pipe.value). \ filter(Dag.project == model.project). \ order_by(Dag.id.desc()). \ all() used_dag_names = set() versions = yaml_load(model.equations) res_dags = [] res_dag = None for dag in models_dags: if dag.name in used_dag_names: continue config = Config.from_yaml(dag.config) d = { 'name': dag.name, 'id': dag.id, 'pipes': [{ 'name': p } for p in config['pipes']] } for pipe in d['pipes']: pipe['versions'] = versions.get(pipe['name'], []) used = [ v.get('used', datetime.datetime.min) for v in pipe['versions'] ] pipe['used'] = datetime.datetime.min if len( used) == 0 else max(used) d['pipes'] = sorted(d['pipes'], key=lambda x: x['used'], reverse=True) for p in d['pipes']: del p['used'] for v in p['versions']: if 'used' in v: del v['used'] used_dag_names.add(dag.name) res_dags.append(d) if d['id'] == model.dag: res_dag = d return {'dags': res_dags, 'dag': res_dag, 'model_id': model_id}
def dag_model_start(session: Session, data: dict): provider = ModelProvider(session) model = provider.by_id(data['model_id']) dag = DagProvider(session ).by_id(data['dag'], joined_load=[Dag.project_rel]) project = dag.project_rel src_config = Config.from_yaml(dag.config) pipe = src_config['pipes'][data['pipe']] for k, v in pipe.items(): if v.get('slot') != data['slot']: continue params = yaml_load(data['interface_params']) slot = { 'interface': data['interface'], 'interface_params': params, 'slot': k, 'name': model.name, 'id': data['model_id'] } v['slot'] = slot config = { 'info': { 'name': data['pipe'], 'project': project.name }, 'executors': pipe } dag_standard( session=session, config=config, debug=False, upload_files=False, copy_files_from=data['dag'] ) model.dag = data['dag'] model.interface = data['interface'] model.interface_params = data['interface_params'] model.slot = data['slot'] provider.commit()
def create_base(self): self.info('create_base') if app.current_task: app.current_task.update_state(state=states.SUCCESS) app.control.revoke(app.current_task.request.id, terminate=True) self.provider = TaskProvider(self.session) self.library_provider = DagLibraryProvider(self.session) self.storage = Storage(self.session) self.task = self.provider.by_id( self.id, joinedload(Task.dag_rel, innerjoin=True)) if not self.task: raise Exception(f'task with id = {self.id} is not found') self.dag = self.task.dag_rel self.executor = None self.hostname = socket.gethostname() self.docker_img = DOCKER_IMG self.worker_index = os.getenv('WORKER_INDEX', -1) self.queue_personal = f'{self.hostname}_{self.docker_img}_' \ f'{self.worker_index}' self.config = Config.from_yaml(self.dag.config) self.executor_type = self.config['executors'][ self.task.executor]['type'] executor = self.config['executors'][self.task.executor] env = {'MKL_NUM_THREADS': 1, 'OMP_NUM_THREADS': 1} env.update(executor.get('env', {})) for k, v in env.items(): os.environ[k] = str(v) self.info(f'Set env. {k} = {v}')
def get(self, filter, options: PaginatorOptions): query = self.query(Model). \ options(joinedload(Model.dag_rel, innerjoin=True)). \ options(joinedload(Model.project_rel, innerjoin=True)) if filter.get('project'): query = query.filter(Model.project == filter['project']) if filter.get('name'): query = query.filter(Model.name.like(f'%{filter["name"]}%')) if filter.get('created_min'): created_min = parse_time(filter['created_min']) query = query.filter(Model.created >= created_min) if filter.get('created_max'): created_max = parse_time(filter['created_max']) query = query.filter(Model.created <= created_max) total = query.count() paginator = self.paginator(query, options) if options else query res = [] models = paginator.all() models_projects = set() for model in models: row = self.to_dict(model, rules=('-project_rel.class_names', )) res.append(row) models_projects.add(model.project) models_dags = self.query(Dag). \ filter(Dag.type == DagType.Pipe.value). \ filter(Dag.project.in_(list(models_projects))). \ order_by(Dag.id.desc()). \ all() dags_by_project = defaultdict(list) used_dag_names = set() for dag in models_dags: if dag.name in used_dag_names: continue config = Config.from_yaml(dag.config) slots = [] for pipe in config['pipes'].values(): for k, v in pipe.items(): if 'slot' in v: if v['slot'] not in slots: slots.append(v['slot']) elif 'slots' in v: for slot in v['slots']: if slot not in slots: slots.append(slot) d = { 'name': dag.name, 'id': dag.id, 'slots': slots, 'interfaces': list(config['interfaces']), 'pipes': list(config['pipes']) } dags_by_project[dag.project].append(d) used_dag_names.add(dag.name) for row in res: row['dags'] = dags_by_project[row['project']] projects = self.query(Project.name, Project.id). \ order_by(Project.id.desc()). \ limit(20). \ all() projects = [{'name': name, 'id': id} for name, id in projects] return {'total': total, 'data': res, 'projects': projects}
def get(self, filter: dict, options: PaginatorOptions): query = self.query(Task, Project.name).\ join(Dag, Dag.id == Task.dag).\ join(Project, Project.id == Dag.project).\ options(joinedload(Task.dag_rel, innerjoin=True)) query = self._get_filter(query, filter) total = query.count() paginator = self.paginator(query, options) res = [] for p, project_name in paginator.all(): if p.dag_rel is None: continue item = {**self.to_dict(p, rules=('-additional_info', ))} item['status'] = to_snake(TaskStatus(item['status']).name) item['type'] = to_snake(TaskType(item['type']).name) item['dag_rel']['project'] = { 'id': item['dag_rel']['project'], 'name': project_name } if p.started is None: delta = 0 elif p.status == TaskStatus.InProgress.value: delta = (now() - p.started).total_seconds() else: finish = (p.finished or p.last_activity) delta = (finish - p.started).total_seconds() item['duration'] = duration_format(delta) if p.dag_rel is not None: res.append(item) if filter.get('report'): tasks_within_report = self.query( ReportTasks.task ).filter(ReportTasks.report == int(filter['report'])) tasks_within_report = {t[0] for t in tasks_within_report} for r in res: r['report_full'] = r['id'] in tasks_within_report projects = self.query(Project.name, Project.id). \ order_by(Project.id.desc()). \ limit(20). \ all() dags = self.query(Dag.name, Dag.id). \ order_by(Dag.id.desc()). \ limit(20). \ all() projects = [{'name': name, 'id': id} for name, id in projects] dags = [{'name': name, 'id': id} for name, id in dags] dags_model = self.query(Dag.name, Dag.id, Dag.config). \ filter(Dag.type == DagType.Pipe.value). \ order_by(Dag.id.desc()). \ all() dags_model_dict = [] used_dag_names = set() for name, id, config in dags_model: if name in used_dag_names: continue config = Config.from_yaml(config) slots = [] for pipe in config['pipes'].values(): for k, v in pipe.items(): if 'slot' in v: slots.append(v['slot']) elif 'slots' in v: slots.extend(v['slots']) dag = { 'name': name, 'id': id, 'slots': slots, 'interfaces': [ { 'name': k, 'params': yaml_dump(v) } for k, v in config['interfaces'].items() ] } dags_model_dict.append(dag) used_dag_names.add(name) return { 'total': total, 'data': res, 'projects': projects, 'dags': dags, 'dags_model': dags_model_dict }