def projects(): data = request_data() options = PaginatorOptions(**data['paginator']) provider = ProjectProvider(_read_session) res = provider.get(data, options) return res
def project_edit(): data = request_data() provider = ProjectProvider(_write_session) res = provider.edit_project(data['name'], yaml_load(data['class_names']), yaml_load(data['ignore_folders'])) return res
def sync(project: str, computer: str, only_from: bool, only_to: bool): _create_computer() computer = computer or socket.gethostname() provider = ComputerProvider(_session) project_provider = ProjectProvider(_session) computer = provider.by_name(computer) computers = provider.all() folders_excluded = [] p = project_provider.by_name(project) assert p, f'Project={project} is not found' ignore = yaml_load(p.ignore_folders) excluded = [] for f in ignore: excluded.append(str(f)) folders_excluded.append([join('data', p.name), excluded]) folders_excluded.append([join('models', p.name), []]) for c in computers: if c.name != computer.name: if not only_from: sync_directed(_session, computer, c, folders_excluded) if not only_to: sync_directed(_session, c, computer, folders_excluded)
def sync_manual(self, computer: Computer, provider: ComputerProvider): """ button sync was clicked manually """ if not computer.meta: return meta = yaml_load(computer.meta) if 'manual_sync' not in meta: return manual_sync = meta['manual_sync'] project_provider = ProjectProvider(self.session) docker_provider = DockerProvider(self.session) dockers = docker_provider.get_online() project = project_provider.by_id(manual_sync['project']) for docker in dockers: if docker.computer == computer.name: continue source = provider.by_name(docker.computer) ignore_folders = [ [join('models', project.name), []] ] sync_directed(self.session, target=computer, source=source, ignore_folders=ignore_folders) del meta['manual_sync'] computer.meta = yaml_dump(meta) provider.update()
def project_add(): data = request_data() provider = ProjectProvider(_write_session) provider.add_project( data['name'], yaml_load(data['class_names']), yaml_load(data['ignore_folders']) )
def create_providers(self): self.provider = TaskProvider(self.session) self.report_provider = ReportProvider(self.session) self.report_tasks_provider = ReportTasksProvider(self.session) self.report_layout_provider = ReportLayoutProvider(self.session) self.project_provider = ProjectProvider(self.session) self.storage = Storage(self.session) self.dag_provider = DagProvider(self.session)
def create_providers(self): self.log_info('create_providers') self.provider = TaskProvider(self.session) self.report_provider = ReportProvider(self.session) self.report_tasks_provider = ReportTasksProvider(self.session) self.report_layout_provider = ReportLayoutProvider(self.session) self.project_provider = ProjectProvider(self.session) self.storage = Storage(self.session, logger=self.logger, component=self.component) self.dag_provider = DagProvider(self.session)
def sync_manual(self, computer: Computer, provider: ComputerProvider): """ button sync was clicked manually """ if not computer.meta: return meta = yaml_load(computer.meta) if 'manual_sync' not in meta: return manual_sync = meta['manual_sync'] project_provider = ProjectProvider(self.session) docker_provider = DockerProvider(self.session) dockers = docker_provider.get_online() project = project_provider.by_id(manual_sync['project']) sync_folders = manual_sync['sync_folders'] ignore_folders = manual_sync['ignore_folders'] sync_folders = correct_folders(sync_folders, project.name) ignore_folders = correct_folders(ignore_folders, project.name) if not isinstance(sync_folders, list): sync_folders = [] if not isinstance(ignore_folders, list): ignore_folders = [] for docker in dockers: if docker.computer == computer.name: continue source = provider.by_name(docker.computer) folders = [[s, ignore_folders] for s in sync_folders] computer.syncing_computer = source.name provider.update() try: sync_directed( self.session, target=computer, source=source, folders=folders ) except Exception as e: self.process_error(e) del meta['manual_sync'] computer.meta = yaml_dump(meta) provider.update()
def dag_pipe(session: Session, config: dict, config_text: str = None): assert 'interfaces' in config, 'interfaces missed' assert 'pipes' in config, 'pipe missed' info = config['info'] storage = Storage(session) dag_provider = DagProvider(session) folder = os.getcwd() project = ProjectProvider(session).by_name(info['project']).id dag = dag_provider.add( Dag( config=config_text, project=project, name=info['name'], docker_img=info.get('docker_img'), type=DagType.Pipe.value ) ) storage.upload(folder, dag) # Change model dags which have the same name ModelProvider(session ).change_dag(project=project, name=info['name'], to=dag.id)
def work(self): project = ProjectProvider(self.session).by_id(self.project) self.info(f'Task = {self.train_task} child_task: {self.child_task}') model = Model(created=now(), name=self.name, project=self.project, equations='', fold=self.fold) provider = ModelProvider(self.session) if self.train_task: task_provider = TaskProvider(self.session) task = task_provider.by_id(self.train_task) model.score_local = task.score task_dir = join(TASK_FOLDER, str(self.child_task or task.id)) src_log = f'{task_dir}/log' models_dir = join(MODEL_FOLDER, project.name) os.makedirs(models_dir, exist_ok=True) model_path_tmp = f'{src_log}/traced.pth' traced = trace_model_from_checkpoint(src_log, self, file=self.file) model_path = f'{models_dir}/{model.name}.pth' model_weight_path = f'{models_dir}/{model.name}_weight.pth' torch.jit.save(traced, model_path_tmp) shutil.copy(model_path_tmp, model_path) file = self.file = 'best_full' shutil.copy(f'{src_log}/checkpoints/{file}.pth', model_weight_path) provider.add(model)
def sync(project: str, computer: str, only_from: bool, only_to: bool, online: bool): """ Syncs specified project on this computer with other computers """ check_statuses() _create_computer() _create_docker() computer = computer or socket.gethostname() provider = ComputerProvider(_session) project_provider = ProjectProvider(_session) computer = provider.by_name(computer) computers = provider.all_with_last_activtiy() p = project_provider.by_name(project) assert p, f'Project={project} is not found' sync_folders = yaml_load(p.sync_folders) ignore_folders = yaml_load(p.ignore_folders) sync_folders = correct_folders(sync_folders, p.name) ignore_folders = correct_folders(ignore_folders, p.name) if not isinstance(sync_folders, list): sync_folders = [] if not isinstance(ignore_folders, list): ignore_folders = [] folders = [[s, ignore_folders] for s in sync_folders] for c in computers: if c.name != computer.name: if online and (now() - c.last_activity).total_seconds() > 100: continue if not only_from: sync_directed(_session, computer, c, folders) if not only_to: sync_directed(_session, c, computer, folders)
def work(self): project = ProjectProvider(self.session).by_id(self.project) self.info(f'Task = {self.train_task} child_task: {self.child_task}') model = Model( created=now(), name=self.name, project=self.project, equations='', fold=self.fold ) provider = ModelProvider(self.session) if self.train_task: task_provider = TaskProvider(self.session) dag_provider = DagProvider(self.session) task = task_provider.by_id(self.train_task) dag = dag_provider.by_id(task.dag) task_dir = join(TASK_FOLDER, str(self.child_task or task.id)) # get log directory config = yaml_load(dag.config) executor_config = config['executors'][task.executor] catalyst_config_file = executor_config['args']['config'] catalyst_config_file = join(task_dir, catalyst_config_file) catalyst_config = yaml_load(file=catalyst_config_file) catalyst_logdir = catalyst_config['args']['logdir'] model.score_local = task.score src_log = f'{task_dir}/{catalyst_logdir}' models_dir = join(MODEL_FOLDER, project.name) os.makedirs(models_dir, exist_ok=True) model_path_tmp = f'{src_log}/traced.pth' traced = trace_model_from_checkpoint(src_log, self, file=self.file) model_path = f'{models_dir}/{model.name}.pth' model_weight_path = f'{models_dir}/{model.name}_weight.pth' torch.jit.save(traced, model_path_tmp) shutil.copy(model_path_tmp, model_path) file = self.file = 'best_full' shutil.copy(f'{src_log}/checkpoints/{file}.pth', model_weight_path) provider.add(model)
def dag_model_add(session: Session, data: dict): if not data.get('task'): model = Model(name=data['name'], project=data['project'], equations=data['equations'], created=now()) ModelProvider(session).add(model) return task_provider = TaskProvider(session) task = task_provider.by_id(data['task'], options=joinedload(Task.dag_rel, innerjoin=True)) child_tasks = task_provider.children(task.id) computer = task.computer_assigned child_task = None if len(child_tasks) > 0: child_task = child_tasks[0].id computer = child_tasks[0].computer_assigned project = ProjectProvider(session).by_id(task.dag_rel.project) config = { 'info': { 'name': 'model_add', 'project': project.name, 'computer': computer }, 'executors': { 'model_add': { 'type': 'model_add', 'project': data['project'], 'task': data.get('task'), 'name': data['name'], 'file': data['file'], 'child_task': child_task, 'fold': data['fold'] } } } dag_standard(session=session, config=config, debug=False, upload_files=False)
def dag_model_add(session: Session, data: dict): task_provider = TaskProvider(session) task = task_provider.by_id(data['task'], options=joinedload(Task.dag_rel, innerjoin=True)) child_tasks = task_provider.children(task.id) computer = task.computer_assigned child_task = None if len(child_tasks) > 0: child_task = child_tasks[0].id computer = child_tasks[0].computer_assigned project = ProjectProvider(session).by_id(task.dag_rel.project) interface_params = data.get('interface_params', '') interface_params = yaml_load(interface_params) config = { 'info': { 'name': 'model_add', 'project': project.name, 'computer': computer }, 'executors': { 'model_add': { 'type': 'model_add', 'dag': data['dag'], 'slot': data['slot'], 'interface': data['interface'], 'task': data.get('task'), 'name': data['name'], 'interface_params': interface_params, 'child_task': child_task } } } dag_standard(session=session, config=config, debug=False, upload_files=False)
def report_add_start(): return { 'projects': ProjectProvider(_read_session).get()['data'], 'layouts': ReportLayoutProvider(_read_session).get()['data'] }
class DagStandardBuilder: def __init__(self, session: Session, config: dict, debug: bool, config_text: str = None, upload_files: bool = True, copy_files_from: int = None, config_path: str = None, control_reqs: bool = True, logger=None, component: ComponentType = None): self.session = session self.config = config self.debug = debug self.config_text = config_text self.upload_files = upload_files self.copy_files_from = copy_files_from self.config_path = config_path self.control_reqs = control_reqs self.info = config['info'] self.layout_name = self.info.get('layout') self.provider = None self.report_provider = None self.report_tasks_provider = None self.report_layout_provider = None self.storage = None self.dag_provider = None self.logger = logger self.component = component self.project = None self.layouts = None self.dag = None self.dag_report_id = None self.created = None self.project_provider = None def log_info(self, message: str): if self.logger: self.logger.info(message, self.component) def create_providers(self): self.log_info('create_providers') self.provider = TaskProvider(self.session) self.report_provider = ReportProvider(self.session) self.report_tasks_provider = ReportTasksProvider(self.session) self.report_layout_provider = ReportLayoutProvider(self.session) self.project_provider = ProjectProvider(self.session) self.storage = Storage(self.session, logger=self.logger, component=self.component) self.dag_provider = DagProvider(self.session) def load_base(self): self.log_info('load_base') project = self.project_provider.by_name(self.info['project']) if project is None: project = self.project_provider.add_project(self.info['project']) self.project = project.id self.layouts = self.report_layout_provider.all() def create_report(self): self.log_info('create_report') self.dag_report_id = None layout_name = self.layout_name if layout_name: if layout_name not in self.layouts: raise Exception(f'Unknown layout = {layout_name}') report = Report(config=yaml_dump(self.layouts[layout_name]), name=self.info['name'], project=self.project, layout=layout_name) self.report_provider.add(report) self.dag_report_id = report.id def create_dag(self): self.log_info('create_dag') dag = Dag(config=self.config_text or yaml_dump(self.config), project=self.project, name=self.info['name'], docker_img=self.info.get('docker_img'), type=DagType.Standard.value, created=now(), report=self.dag_report_id) self.dag = self.dag_provider.add(dag) def upload(self): self.log_info('upload') if self.upload_files: folder = os.path.dirname(os.path.abspath(self.config_path)) if 'expdir' in self.config['info']: path = os.path.dirname(os.path.abspath(self.config_path)) folder = os.path.abspath( os.path.join(path, self.config['info']['expdir'])) self.storage.upload(folder, self.dag, control_reqs=self.control_reqs) elif self.copy_files_from: self.storage.copy_from(self.copy_files_from, self.dag) def create_task(self, k: str, v: dict, name: str, info: dict): task_type = TaskType.User.value if v.get('task_type') == 'train' or \ Executor.is_trainable(v['type']): task_type = TaskType.Train.value gpu = str(v.get('gpu', '0')) if '-' not in gpu: gpu = int(gpu) gpu_max = gpu else: gpu, gpu_max = map(int, gpu.split('-')) if gpu == 0 and gpu_max > 0: raise Exception(f'Executor = {k} Gpu_max can"t be>0 when gpu=0') task = Task(name=name, executor=k, computer=self.info.get('computer') or v.get('computer'), gpu=gpu, gpu_max=gpu_max, cpu=v.get('cpu', 1), memory=v.get('memory', 0.1), dag=self.dag.id, debug=self.debug, steps=int(v.get('steps', '1')), type=task_type) task.additional_info = yaml_dump(info) report = None if self.layout_name and task_type == TaskType.Train.value: if self.layout_name not in self.layouts: raise Exception(f'Unknown report = {v["report"]}') report_config = self.layouts[self.layout_name] info['report_config'] = report_config task.additional_info = yaml_dump(info) report = Report(config=yaml_dump(report_config), name=task.name, project=self.project, layout=self.layout_name) return task, report def create_tasks(self): self.log_info('create_tasks') created = OrderedDict() executors = self.config['executors'] tasks = [] dependencies = [] reports = [] while len(created) < len(executors): for k, v in executors.items(): valid = True if 'depends' in v: depends = v['depends'] if not isinstance(depends, list): depends = [depends] for d in depends: if d == k: raise Exception(f'Executor {k} depends on itself') if d not in executors: raise Exception(f'Executor {k} depend on {d} ' f'which does not exist') valid = valid and d in created if valid: names = [] infos = [] if 'grid' in v: grid = v['grid'] cells = grid_cells(grid) for i, (cell, cell_name) in enumerate(cells): names.append(cell_name) infos.append({'grid_cell': i}) else: names.append(v.get('name', k)) infos.append({}) k_tasks = [] for name, info in zip(names, infos): task, report = self.create_task(k, v, name=name, info=info) tasks.append(task) k_tasks.append(task) reports.append(report) if 'depends' in v: depends = v['depends'] if not isinstance(depends, list): depends = [depends] for d in depends: for dd in created[d]: dependencies.append((task, dd)) created[k] = k_tasks not_empty_reports = [r for r in reports if r is not None] if len(not_empty_reports) > 0: self.provider.bulk_save_objects(not_empty_reports, return_defaults=True) for report, task in zip(reports, tasks): if report is not None: task.report = report.id self.provider.bulk_save_objects(tasks, return_defaults=True) if len(not_empty_reports) > 0: report_tasks = [] for report, task in zip(reports, tasks): if report is not None: report_tasks.append( ReportTasks(report=report.id, task=task.id)) self.report_tasks_provider.bulk_save_objects(report_tasks) dependencies = [ TaskDependence(task_id=task.id, depend_id=dd.id) for task, dd in dependencies ] self.provider.bulk_save_objects(dependencies) for k, v in created.items(): created[k] = [vv.id for vv in v] self.created = created def build(self): self.create_providers() self.load_base() self.create_report() self.create_dag() self.upload() self.create_tasks() self.log_info('Done') return self.created
def _configure(self, session): provider = ProjectProvider(session) provider.add_project(name='test') return provider
def test_add(self, session: Session): provider = ProjectProvider(session) project = provider.add_project(name='test') assert provider.by_id(project.id)
def project_remove(): id = request_data()['id'] ProjectProvider(_write_session).remove(id)