예제 #1
0
    def create_service_task(
            self,
            task: Task,
            gpu_assigned=None,
            distr_info: dict = None,
            resume: dict = None
    ):
        new_task = Task(
            name=task.name,
            computer=task.computer,
            executor=task.executor,
            status=TaskStatus.NotRan.value,
            type=TaskType.Service.value,
            gpu_assigned=gpu_assigned,
            parent=task.id,
            report=task.report,
            dag=task.dag
        )
        new_task.additional_info = task.additional_info

        if distr_info:
            additional_info = yaml_load(new_task.additional_info)
            additional_info['distr_info'] = distr_info
            new_task.additional_info = yaml_dump(additional_info)

        if resume:
            additional_info = yaml_load(new_task.additional_info)
            additional_info['resume'] = resume
            new_task.additional_info = yaml_dump(additional_info)

        return self.provider.add(new_task)
예제 #2
0
파일: standard.py 프로젝트: xyuan/mlcomp
    def create_task(self, k: str, v: dict, name: str, info: dict):
        task_type = TaskType.User.value
        if v.get('task_type') == 'train' or \
                Executor.is_trainable(v['type']):
            task_type = TaskType.Train.value

        gpu = str(v.get('gpu', '0'))
        if '-' not in gpu:
            gpu = int(gpu)
            gpu_max = gpu
        else:
            gpu, gpu_max = map(int, gpu.split('-'))

        if gpu == 0 and gpu_max > 0:
            raise Exception(f'Executor = {k} Gpu_max can"t be>0 when gpu=0')

        task = Task(name=name,
                    executor=k,
                    computer=self.info.get('computer'),
                    gpu=gpu,
                    gpu_max=gpu_max,
                    cpu=v.get('cpu', 1),
                    memory=v.get('memory', 0.1),
                    dag=self.dag.id,
                    debug=self.debug,
                    steps=int(v.get('steps', '1')),
                    type=task_type)

        if self.layout_name and task_type == TaskType.Train.value:
            if self.layout_name not in self.layouts:
                raise Exception(f'Unknown report = {v["report"]}')

            report_config = self.layouts[self.layout_name]
            info['report_config'] = report_config

            task.additional_info = yaml_dump(info)
            self.provider.add(task, commit=False)
            report = Report(config=yaml_dump(report_config),
                            name=task.name,
                            project=self.project,
                            layout=self.layout_name)
            self.report_provider.add(report)
            task.report = report.id

            self.report_tasks_provider.add(
                ReportTasks(report=report.id, task=task.id))

            self.report_tasks_provider.add(
                ReportTasks(report=self.dag_report_id, task=task.id))

            self.provider.commit()
        else:
            task.additional_info = yaml_dump(self.additional_info)
            self.provider.add(task)

        return task.id
예제 #3
0
파일: standard.py 프로젝트: shlemph/mlcomp
    def create_task(self,
                    k: str,
                    v: dict,
                    name: str,
                    info: dict,
                    cell: dict = None):
        task_type = TaskType.User.value
        v = deepcopy(v)
        if v.get('task_type') == 'train' or \
                Executor.is_trainable(v['type']):
            task_type = TaskType.Train.value

        gpu = str(v.get('gpu', '0'))
        if '-' not in gpu:
            gpu = int(gpu)
            gpu_max = gpu
        else:
            gpu, gpu_max = map(int, gpu.split('-'))

        if gpu == 0 and gpu_max > 0:
            raise Exception(f'Executor = {k} Gpu_max can"t be>0 when gpu=0')

        task = Task(name=name,
                    executor=k,
                    computer=self.info.get('computer') or v.get('computer'),
                    gpu=gpu,
                    gpu_max=gpu_max,
                    cpu=v.get('cpu', 1),
                    memory=v.get('memory', 0.1),
                    dag=self.dag.id,
                    debug=self.debug,
                    steps=int(v.get('steps', '1')),
                    type=task_type)

        if cell is not None:
            v.update(cell)

        info['executor'] = v
        task.additional_info = yaml_dump(info)
        report = None

        if self.layout_name and task_type == TaskType.Train.value:
            if self.layout_name not in self.layouts:
                raise Exception(f'Unknown report = {v["report"]}')

            report_config = self.layouts[self.layout_name]
            info['report_config'] = report_config

            task.additional_info = yaml_dump(info)
            report = Report(config=yaml_dump(report_config),
                            name=task.name,
                            project=self.project,
                            layout=self.layout_name)

        return task, report
예제 #4
0
    def create_tasks(self):
        tasks = self.task_provider.by_dag(self.dag)
        tasks_new = []
        tasks_old = []

        for t in tasks:
            if t.parent:
                continue

            task = Task(
                name=t.name,
                status=TaskStatus.NotRan.value,
                computer=t.computer,
                gpu=t.gpu,
                gpu_max=t.gpu_max,
                cpu=t.cpu,
                executor=t.executor,
                memory=t.memory,
                steps=t.steps,
                dag=self.dag_db.id,
                debug=t.debug,
                type=t.type,
            )
            task.additional_info = t.additional_info
            tasks_new.append(task)
            tasks_old.append(t)

        self.task_provider.bulk_save_objects(tasks_new, return_defaults=True)
        old2new = {
            t_old.id: t_new.id
            for t_new, t_old in zip(tasks_new, tasks_old)
        }
        dependencies = self.task_provider.get_dependencies(self.dag)
        dependencies_new = []
        for d in dependencies:
            d_new = TaskDependence(task_id=old2new[d.task_id],
                                   depend_id=old2new[d.depend_id])
            dependencies_new.append(d_new)

        self.task_provider.bulk_save_objects(dependencies_new,
                                             return_defaults=False)

        changes = yaml_load(self.file_changes)
        storages = self.dag_storage_provider.by_dag(self.dag)
        storages_new = []

        for s, f in storages:
            if not isinstance(changes, dict):
                continue

            replace = self.find_replace(changes, s.path)
            if replace is not None and f:
                content = f.content.decode('utf-8')
                if s.path.endswith('.yml'):
                    data = yaml_load(content)
                    data = merge_dicts_smart(data, replace)
                    content = yaml_dump(data)
                else:
                    for k, v in replace:
                        if k not in content:
                            raise Exception(f'{k} is not in the content')
                        content = content.replace(k, v)
                content = content.encode('utf-8')
                md5 = hashlib.md5(content).hexdigest()
                f = self.file_provider.by_md5(md5)
                if not f:
                    f = File(content=content,
                             created=now(),
                             project=self.dag_db.project,
                             md5=md5,
                             dag=self.dag_db.id)
                self.file_provider.add(f)

            s_new = DagStorage(dag=self.dag_db.id,
                               file=f.id,
                               path=s.path,
                               is_dir=s.is_dir)
            storages_new.append(s_new)

        self.dag_storage_provider.bulk_save_objects(storages_new,
                                                    return_defaults=False)