Exemple #1
0
    def create_tasks(self):
        tasks = self.task_provider.by_dag(self.dag)
        tasks_new = []
        tasks_old = []

        for t in tasks:
            if t.parent:
                continue

            task = Task(
                name=t.name,
                status=TaskStatus.NotRan.value,
                computer=t.computer,
                gpu=t.gpu,
                gpu_max=t.gpu_max,
                cpu=t.cpu,
                executor=t.executor,
                memory=t.memory,
                steps=t.steps,
                dag=self.dag_db.id,
                debug=t.debug,
                type=t.type,
            )
            task.additional_info = t.additional_info
            tasks_new.append(task)
            tasks_old.append(t)

        self.task_provider.bulk_save_objects(tasks_new, return_defaults=True)
        old2new = {
            t_old.id: t_new.id
            for t_new, t_old in zip(tasks_new, tasks_old)
        }
        dependencies = self.task_provider.get_dependencies(self.dag)
        dependencies_new = []
        for d in dependencies:
            d_new = TaskDependence(task_id=old2new[d.task_id],
                                   depend_id=old2new[d.depend_id])
            dependencies_new.append(d_new)

        self.task_provider.bulk_save_objects(dependencies_new,
                                             return_defaults=False)

        changes = yaml_load(self.file_changes)
        storages = self.dag_storage_provider.by_dag(self.dag)
        storages_new = []

        for s, f in storages:
            if not isinstance(changes, dict):
                continue

            replace = self.find_replace(changes, s.path)
            if replace is not None and f:
                content = f.content.decode('utf-8')
                if s.path.endswith('.yml'):
                    data = yaml_load(content)
                    data = merge_dicts_smart(data, replace)
                    content = yaml_dump(data)
                else:
                    for k, v in replace:
                        if k not in content:
                            raise Exception(f'{k} is not in the content')
                        content = content.replace(k, v)
                content = content.encode('utf-8')
                md5 = hashlib.md5(content).hexdigest()
                f = self.file_provider.by_md5(md5)
                if not f:
                    f = File(content=content,
                             created=now(),
                             project=self.dag_db.project,
                             md5=md5,
                             dag=self.dag_db.id)
                self.file_provider.add(f)

            s_new = DagStorage(dag=self.dag_db.id,
                               file=f.id,
                               path=s.path,
                               is_dir=s.is_dir)
            storages_new.append(s_new)

        self.dag_storage_provider.bulk_save_objects(storages_new,
                                                    return_defaults=False)
Exemple #2
0
 def add_dependency(self, task_id: int, depend_id: int) -> None:
     self.add(TaskDependence(task_id=task_id, depend_id=depend_id))
Exemple #3
0
    def create_tasks(self):
        self.log_info('create_tasks')

        created = OrderedDict()
        executors = self.config['executors']

        tasks = []
        dependencies = []
        reports = []

        while len(created) < len(executors):
            for k, v in executors.items():
                valid = True
                if 'depends' in v:
                    depends = v['depends']
                    if not isinstance(depends, list):
                        depends = [depends]

                    for d in depends:
                        if d == k:
                            raise Exception(f'Executor {k} depends on itself')

                        if d not in executors:
                            raise Exception(f'Executor {k} depend on {d} '
                                            f'which does not exist')

                        valid = valid and d in created
                if valid:
                    names = []
                    infos = []
                    if 'grid' in v:
                        grid = v['grid']
                        cells = grid_cells(grid)
                        for i, (cell, cell_name) in enumerate(cells):
                            names.append(cell_name)
                            infos.append({'grid_cell': i})
                    else:
                        names.append(v.get('name', k))
                        infos.append({})

                    k_tasks = []
                    for name, info in zip(names, infos):
                        task, report = self.create_task(k,
                                                        v,
                                                        name=name,
                                                        info=info)
                        tasks.append(task)
                        k_tasks.append(task)
                        reports.append(report)

                        if 'depends' in v:
                            depends = v['depends']
                            if not isinstance(depends, list):
                                depends = [depends]

                            for d in depends:
                                for dd in created[d]:
                                    dependencies.append((task, dd))
                    created[k] = k_tasks

        not_empty_reports = [r for r in reports if r is not None]
        if len(not_empty_reports) > 0:
            self.provider.bulk_save_objects(not_empty_reports,
                                            return_defaults=True)
            for report, task in zip(reports, tasks):
                if report is not None:
                    task.report = report.id

        self.provider.bulk_save_objects(tasks, return_defaults=True)

        if len(not_empty_reports) > 0:
            report_tasks = []
            for report, task in zip(reports, tasks):
                if report is not None:
                    report_tasks.append(
                        ReportTasks(report=report.id, task=task.id))
            self.report_tasks_provider.bulk_save_objects(report_tasks)

        dependencies = [
            TaskDependence(task_id=task.id, depend_id=dd.id)
            for task, dd in dependencies
        ]
        self.provider.bulk_save_objects(dependencies)

        for k, v in created.items():
            created[k] = [vv.id for vv in v]
        self.created = created