Beispiel #1
0
def dag_pipe(session: Session, config: dict, config_text: str = None):
    assert 'interfaces' in config, 'interfaces missed'
    assert 'pipes' in config, 'pipe missed'

    info = config['info']

    storage = Storage(session)
    dag_provider = DagProvider(session)

    folder = os.getcwd()
    project = ProjectProvider(session).by_name(info['project']).id
    dag = dag_provider.add(
        Dag(
            config=config_text,
            project=project,
            name=info['name'],
            docker_img=info.get('docker_img'),
            type=DagType.Pipe.value
        )
    )
    storage.upload(folder, dag)

    # Change model dags which have the same name
    ModelProvider(session
                  ).change_dag(project=project, name=info['name'], to=dag.id)
Beispiel #2
0
class DagCopyBuilder:
    def __init__(self,
                 session: Session,
                 dag: int,
                 file_changes: str = '',
                 dag_suffix: str = '',
                 logger=None,
                 component: ComponentType = None):
        self.dag = dag
        self.file_changes = file_changes
        self.session = session
        self.logger = logger
        self.component = component
        self.dag_suffix = dag_suffix

        self.dag_db = None

        self.dag_provider = None
        self.task_provider = None
        self.file_provider = None
        self.dag_storage_provider = None

    def log_info(self, message: str):
        if self.logger:
            self.logger.info(message, self.component)

    def create_providers(self):
        self.log_info('create_providers')

        self.dag_provider = DagProvider(self.session)
        self.task_provider = TaskProvider(self.session)
        self.file_provider = FileProvider(self.session)
        self.dag_storage_provider = DagStorageProvider(self.session)

    def create_dag(self):
        dag = self.dag_provider.by_id(self.dag)
        name = dag.name
        if self.dag_suffix:
            name += ' ' + self.dag_suffix
        dag_new = Dag(name=name,
                      created=now(),
                      config=dag.config,
                      project=dag.project,
                      docker_img=dag.docker_img,
                      img_size=0,
                      file_size=0,
                      type=dag.type)
        self.dag_provider.add(dag_new)
        self.dag_db = dag_new

    def find_replace(self, changes: dict, path: str):
        for k, v in changes.items():
            if not re.match(k, path):
                continue
            return v

    def create_tasks(self):
        tasks = self.task_provider.by_dag(self.dag)
        tasks_new = []
        tasks_old = []

        for t in tasks:
            if t.parent:
                continue

            task = Task(
                name=t.name,
                status=TaskStatus.NotRan.value,
                computer=t.computer,
                gpu=t.gpu,
                gpu_max=t.gpu_max,
                cpu=t.cpu,
                executor=t.executor,
                memory=t.memory,
                steps=t.steps,
                dag=self.dag_db.id,
                debug=t.debug,
                type=t.type,
            )
            task.additional_info = t.additional_info
            tasks_new.append(task)
            tasks_old.append(t)

        self.task_provider.bulk_save_objects(tasks_new, return_defaults=True)
        old2new = {
            t_old.id: t_new.id
            for t_new, t_old in zip(tasks_new, tasks_old)
        }
        dependencies = self.task_provider.get_dependencies(self.dag)
        dependencies_new = []
        for d in dependencies:
            d_new = TaskDependence(task_id=old2new[d.task_id],
                                   depend_id=old2new[d.depend_id])
            dependencies_new.append(d_new)

        self.task_provider.bulk_save_objects(dependencies_new,
                                             return_defaults=False)

        changes = yaml_load(self.file_changes)
        storages = self.dag_storage_provider.by_dag(self.dag)
        storages_new = []

        for s, f in storages:
            if not isinstance(changes, dict):
                continue

            replace = self.find_replace(changes, s.path)
            if replace is not None and f:
                content = f.content.decode('utf-8')
                if s.path.endswith('.yml'):
                    data = yaml_load(content)
                    data = merge_dicts_smart(data, replace)
                    content = yaml_dump(data)
                else:
                    for k, v in replace:
                        if k not in content:
                            raise Exception(f'{k} is not in the content')
                        content = content.replace(k, v)
                content = content.encode('utf-8')
                md5 = hashlib.md5(content).hexdigest()
                f = self.file_provider.by_md5(md5)
                if not f:
                    f = File(content=content,
                             created=now(),
                             project=self.dag_db.project,
                             md5=md5,
                             dag=self.dag_db.id)
                self.file_provider.add(f)

            s_new = DagStorage(dag=self.dag_db.id,
                               file=f.id,
                               path=s.path,
                               is_dir=s.is_dir)
            storages_new.append(s_new)

        self.dag_storage_provider.bulk_save_objects(storages_new,
                                                    return_defaults=False)

    def build(self):
        self.create_providers()
        self.create_dag()
        self.create_tasks()
Beispiel #3
0
def dag_tag_add():
    data = request_data()
    provider = DagProvider(_write_session)
    tag = DagTag(dag=data['dag'], tag=data['tag'])
    provider.add(tag)
Beispiel #4
0
class DagStandardBuilder:
    def __init__(self,
                 session: Session,
                 config: dict,
                 debug: bool,
                 config_text: str = None,
                 upload_files: bool = True,
                 copy_files_from: int = None,
                 config_path: str = None,
                 control_reqs: bool = True,
                 logger=None,
                 component: ComponentType = None):
        self.session = session
        self.config = config
        self.debug = debug
        self.config_text = config_text
        self.upload_files = upload_files
        self.copy_files_from = copy_files_from
        self.config_path = config_path
        self.control_reqs = control_reqs

        self.info = config['info']
        self.layout_name = self.info.get('layout')

        self.provider = None
        self.report_provider = None
        self.report_tasks_provider = None
        self.report_layout_provider = None
        self.storage = None
        self.dag_provider = None
        self.logger = logger
        self.component = component

        self.project = None
        self.layouts = None
        self.dag = None
        self.dag_report_id = None
        self.created = None
        self.project_provider = None

    def log_info(self, message: str):
        if self.logger:
            self.logger.info(message, self.component)

    def create_providers(self):
        self.log_info('create_providers')

        self.provider = TaskProvider(self.session)
        self.report_provider = ReportProvider(self.session)
        self.report_tasks_provider = ReportTasksProvider(self.session)
        self.report_layout_provider = ReportLayoutProvider(self.session)
        self.project_provider = ProjectProvider(self.session)

        self.storage = Storage(self.session,
                               logger=self.logger,
                               component=self.component)
        self.dag_provider = DagProvider(self.session)

    def load_base(self):
        self.log_info('load_base')

        project = self.project_provider.by_name(self.info['project'])
        if project is None:
            project = self.project_provider.add_project(self.info['project'])

        self.project = project.id
        self.layouts = self.report_layout_provider.all()

    def create_report(self):
        self.log_info('create_report')

        self.dag_report_id = None
        layout_name = self.layout_name
        if layout_name:
            if layout_name not in self.layouts:
                raise Exception(f'Unknown layout = {layout_name}')

            report = Report(config=yaml_dump(self.layouts[layout_name]),
                            name=self.info['name'],
                            project=self.project,
                            layout=layout_name)
            self.report_provider.add(report)
            self.dag_report_id = report.id

    def create_dag(self):
        self.log_info('create_dag')

        dag = Dag(config=self.config_text or yaml_dump(self.config),
                  project=self.project,
                  name=self.info['name'],
                  docker_img=self.info.get('docker_img'),
                  type=DagType.Standard.value,
                  created=now(),
                  report=self.dag_report_id)

        self.dag = self.dag_provider.add(dag)

    def upload(self):
        self.log_info('upload')

        if self.upload_files:
            folder = os.path.dirname(os.path.abspath(self.config_path))
            if 'expdir' in self.config['info']:
                path = os.path.dirname(os.path.abspath(self.config_path))
                folder = os.path.abspath(
                    os.path.join(path, self.config['info']['expdir']))
            self.storage.upload(folder,
                                self.dag,
                                control_reqs=self.control_reqs)
        elif self.copy_files_from:
            self.storage.copy_from(self.copy_files_from, self.dag)

    def create_task(self, k: str, v: dict, name: str, info: dict):
        task_type = TaskType.User.value
        if v.get('task_type') == 'train' or \
                Executor.is_trainable(v['type']):
            task_type = TaskType.Train.value

        gpu = str(v.get('gpu', '0'))
        if '-' not in gpu:
            gpu = int(gpu)
            gpu_max = gpu
        else:
            gpu, gpu_max = map(int, gpu.split('-'))

        if gpu == 0 and gpu_max > 0:
            raise Exception(f'Executor = {k} Gpu_max can"t be>0 when gpu=0')

        task = Task(name=name,
                    executor=k,
                    computer=self.info.get('computer') or v.get('computer'),
                    gpu=gpu,
                    gpu_max=gpu_max,
                    cpu=v.get('cpu', 1),
                    memory=v.get('memory', 0.1),
                    dag=self.dag.id,
                    debug=self.debug,
                    steps=int(v.get('steps', '1')),
                    type=task_type)
        task.additional_info = yaml_dump(info)
        report = None
        if self.layout_name and task_type == TaskType.Train.value:
            if self.layout_name not in self.layouts:
                raise Exception(f'Unknown report = {v["report"]}')

            report_config = self.layouts[self.layout_name]
            info['report_config'] = report_config

            task.additional_info = yaml_dump(info)
            report = Report(config=yaml_dump(report_config),
                            name=task.name,
                            project=self.project,
                            layout=self.layout_name)

        return task, report

    def create_tasks(self):
        self.log_info('create_tasks')

        created = OrderedDict()
        executors = self.config['executors']

        tasks = []
        dependencies = []
        reports = []

        while len(created) < len(executors):
            for k, v in executors.items():
                valid = True
                if 'depends' in v:
                    depends = v['depends']
                    if not isinstance(depends, list):
                        depends = [depends]

                    for d in depends:
                        if d == k:
                            raise Exception(f'Executor {k} depends on itself')

                        if d not in executors:
                            raise Exception(f'Executor {k} depend on {d} '
                                            f'which does not exist')

                        valid = valid and d in created
                if valid:
                    names = []
                    infos = []
                    if 'grid' in v:
                        grid = v['grid']
                        cells = grid_cells(grid)
                        for i, (cell, cell_name) in enumerate(cells):
                            names.append(cell_name)
                            infos.append({'grid_cell': i})
                    else:
                        names.append(v.get('name', k))
                        infos.append({})

                    k_tasks = []
                    for name, info in zip(names, infos):
                        task, report = self.create_task(k,
                                                        v,
                                                        name=name,
                                                        info=info)
                        tasks.append(task)
                        k_tasks.append(task)
                        reports.append(report)

                        if 'depends' in v:
                            depends = v['depends']
                            if not isinstance(depends, list):
                                depends = [depends]

                            for d in depends:
                                for dd in created[d]:
                                    dependencies.append((task, dd))
                    created[k] = k_tasks

        not_empty_reports = [r for r in reports if r is not None]
        if len(not_empty_reports) > 0:
            self.provider.bulk_save_objects(not_empty_reports,
                                            return_defaults=True)
            for report, task in zip(reports, tasks):
                if report is not None:
                    task.report = report.id

        self.provider.bulk_save_objects(tasks, return_defaults=True)

        if len(not_empty_reports) > 0:
            report_tasks = []
            for report, task in zip(reports, tasks):
                if report is not None:
                    report_tasks.append(
                        ReportTasks(report=report.id, task=task.id))
            self.report_tasks_provider.bulk_save_objects(report_tasks)

        dependencies = [
            TaskDependence(task_id=task.id, depend_id=dd.id)
            for task, dd in dependencies
        ]
        self.provider.bulk_save_objects(dependencies)

        for k, v in created.items():
            created[k] = [vv.id for vv in v]
        self.created = created

    def build(self):
        self.create_providers()

        self.load_base()

        self.create_report()

        self.create_dag()

        self.upload()

        self.create_tasks()

        self.log_info('Done')

        return self.created