Example #1
0
    def upload(self, folder: str, dag: Dag, control_reqs: bool = True):
        hashs = self.file_provider.hashs(dag.project)

        files = []
        all_files = []
        spec = self._build_spec(folder)

        for o in glob(os.path.join(folder, '**'), recursive=True):
            path = os.path.relpath(o, folder)
            if spec.match_file(path) or path == '.':
                continue

            if isdir(o):
                self.provider.add(
                    DagStorage(dag=dag.id, path=path, is_dir=True)
                )
                continue
            content = open(o, 'rb').read()
            md5 = hashlib.md5(content).hexdigest()

            all_files.append(o)

            if md5 in hashs:
                file_id = hashs[md5]
            else:
                file = File(
                    md5=md5,
                    content=content,
                    project=dag.project,
                    dag=dag.id,
                    created=now()
                )
                self.file_provider.add(file)
                file_id = file.id
                hashs[md5] = file.id
                files.append(o)

            self.provider.add(
                DagStorage(dag=dag.id, path=path, file=file_id, is_dir=False)
            )

        if INSTALL_DEPENDENCIES and control_reqs:
            reqs = control_requirements(folder, files=all_files)
            for name, rel, version in reqs:
                self.library_provider.add(
                    DagLibrary(dag=dag.id, library=name, version=version)
                )
Example #2
0
    def copy_from(self, src: int, dag: Dag):
        storages = self.provider.query(DagStorage). \
            filter(DagStorage.dag == src). \
            all()
        libraries = self.library_provider.query(DagLibrary). \
            filter(DagLibrary.dag == src). \
            all()

        s_news = []
        for s in storages:
            s_new = DagStorage(
                dag=dag.id, file=s.file, path=s.path, is_dir=s.is_dir
            )
            s_news.append(s_new)
        l_news = []
        for l in libraries:
            l_new = DagLibrary(
                dag=dag.id, library=l.library, version=l.version
            )
            l_news.append(l_new)

        self.provider.add_all(s_news)
        self.library_provider.add_all(l_news)
Example #3
0
    def upload(self, folder: str, dag: Dag, control_reqs: bool = True):
        self.log_info('upload started')
        hashs = self.file_provider.hashs(dag.project)
        self.log_info('hashes are retrieved')

        all_files = []
        spec = self._build_spec(folder)

        files = glob(os.path.join(folder, '**'))
        for file in files[:]:
            path = os.path.relpath(file, folder)
            if spec.match_file(path) or path == '.':
                continue
            if os.path.isdir(file):
                child_files = glob(os.path.join(folder, file, '**'),
                                   recursive=True)
                files.extend(child_files)

        if self.max_count and len(files) > self.max_count:
            raise Exception(f'files count = {len(files)} '
                            f'But max count = {self.max_count}')

        self.log_info('list of files formed')

        folders_to_add = []
        files_to_add = []
        files_storage_to_add = []

        total_size_added = 0

        for o in files:
            path = os.path.relpath(o, folder)
            if spec.match_file(path) or path == '.':
                continue

            if isdir(o):
                folder_to_add = DagStorage(dag=dag.id, path=path, is_dir=True)
                folders_to_add.append(folder_to_add)
                continue
            content = open(o, 'rb').read()
            size = sys.getsizeof(content)
            if self.max_file_size and size > self.max_file_size:
                raise Exception(
                    f'file = {o} has size {size}.'
                    f' But max size is set to {self.max_file_size}')
            md5 = hashlib.md5(content).hexdigest()

            all_files.append(o)

            if md5 not in hashs:
                file = File(md5=md5,
                            content=content,
                            project=dag.project,
                            dag=dag.id,
                            created=now())
                hashs[md5] = file
                files_to_add.append(file)
                total_size_added += size

            file_storage = DagStorage(dag=dag.id,
                                      path=path,
                                      file=hashs[md5],
                                      is_dir=False)
            files_storage_to_add.append(file_storage)

        self.log_info('inserting DagStorage folders')

        if len(folders_to_add) > 0:
            self.provider.bulk_save_objects(folders_to_add)

        self.log_info('inserting Files')

        if len(files_to_add) > 0:
            self.file_provider.bulk_save_objects(files_to_add,
                                                 return_defaults=True)

        self.log_info('inserting DagStorage Files')

        if len(files_storage_to_add) > 0:
            for file_storage in files_storage_to_add:
                if isinstance(file_storage.file, File):
                    # noinspection PyUnresolvedReferences
                    file_storage.file = file_storage.file.id

            self.provider.bulk_save_objects(files_storage_to_add)

        dag.file_size += total_size_added

        self.dag_provider.update()

        if INSTALL_DEPENDENCIES and control_reqs:
            reqs = control_requirements(folder, files=all_files)
            for name, rel, version in reqs:
                self.library_provider.add(
                    DagLibrary(dag=dag.id, library=name, version=version))
Example #4
0
    def create_tasks(self):
        tasks = self.task_provider.by_dag(self.dag)
        tasks_new = []
        tasks_old = []

        for t in tasks:
            if t.parent:
                continue

            task = Task(
                name=t.name,
                status=TaskStatus.NotRan.value,
                computer=t.computer,
                gpu=t.gpu,
                gpu_max=t.gpu_max,
                cpu=t.cpu,
                executor=t.executor,
                memory=t.memory,
                steps=t.steps,
                dag=self.dag_db.id,
                debug=t.debug,
                type=t.type,
            )
            task.additional_info = t.additional_info
            tasks_new.append(task)
            tasks_old.append(t)

        self.task_provider.bulk_save_objects(tasks_new, return_defaults=True)
        old2new = {
            t_old.id: t_new.id
            for t_new, t_old in zip(tasks_new, tasks_old)
        }
        dependencies = self.task_provider.get_dependencies(self.dag)
        dependencies_new = []
        for d in dependencies:
            d_new = TaskDependence(task_id=old2new[d.task_id],
                                   depend_id=old2new[d.depend_id])
            dependencies_new.append(d_new)

        self.task_provider.bulk_save_objects(dependencies_new,
                                             return_defaults=False)

        changes = yaml_load(self.file_changes)
        storages = self.dag_storage_provider.by_dag(self.dag)
        storages_new = []

        for s, f in storages:
            if not isinstance(changes, dict):
                continue

            replace = self.find_replace(changes, s.path)
            if replace is not None and f:
                content = f.content.decode('utf-8')
                if s.path.endswith('.yml'):
                    data = yaml_load(content)
                    data = merge_dicts_smart(data, replace)
                    content = yaml_dump(data)
                else:
                    for k, v in replace:
                        if k not in content:
                            raise Exception(f'{k} is not in the content')
                        content = content.replace(k, v)
                content = content.encode('utf-8')
                md5 = hashlib.md5(content).hexdigest()
                f = self.file_provider.by_md5(md5)
                if not f:
                    f = File(content=content,
                             created=now(),
                             project=self.dag_db.project,
                             md5=md5,
                             dag=self.dag_db.id)
                self.file_provider.add(f)

            s_new = DagStorage(dag=self.dag_db.id,
                               file=f.id,
                               path=s.path,
                               is_dir=s.is_dir)
            storages_new.append(s_new)

        self.dag_storage_provider.bulk_save_objects(storages_new,
                                                    return_defaults=False)