예제 #1
0
파일: app.py 프로젝트: xang1234/mlcomp
def projects():
    data = request_data()
    options = PaginatorOptions(**data['paginator'])

    provider = ProjectProvider(_read_session)
    res = provider.get(data, options)
    return res
예제 #2
0
파일: app.py 프로젝트: xang1234/mlcomp
def project_edit():
    data = request_data()

    provider = ProjectProvider(_write_session)
    res = provider.edit_project(data['name'], yaml_load(data['class_names']),
                                yaml_load(data['ignore_folders']))
    return res
예제 #3
0
def sync(project: str, computer: str, only_from: bool, only_to: bool):
    _create_computer()

    computer = computer or socket.gethostname()
    provider = ComputerProvider(_session)
    project_provider = ProjectProvider(_session)
    computer = provider.by_name(computer)
    computers = provider.all()
    folders_excluded = []
    p = project_provider.by_name(project)
    assert p, f'Project={project} is not found'

    ignore = yaml_load(p.ignore_folders)
    excluded = []
    for f in ignore:
        excluded.append(str(f))

    folders_excluded.append([join('data', p.name), excluded])
    folders_excluded.append([join('models', p.name), []])

    for c in computers:
        if c.name != computer.name:
            if not only_from:
                sync_directed(_session, computer, c, folders_excluded)
            if not only_to:
                sync_directed(_session, c, computer, folders_excluded)
예제 #4
0
파일: sync.py 프로젝트: shlemph/mlcomp
    def sync_manual(self, computer: Computer, provider: ComputerProvider):
        """
        button sync was clicked manually
        """
        if not computer.meta:
            return

        meta = yaml_load(computer.meta)
        if 'manual_sync' not in meta:
            return

        manual_sync = meta['manual_sync']

        project_provider = ProjectProvider(self.session)
        docker_provider = DockerProvider(self.session)

        dockers = docker_provider.get_online()
        project = project_provider.by_id(manual_sync['project'])

        for docker in dockers:
            if docker.computer == computer.name:
                continue

            source = provider.by_name(docker.computer)
            ignore_folders = [
                [join('models', project.name), []]
            ]
            sync_directed(self.session, target=computer, source=source,
                          ignore_folders=ignore_folders)

        del meta['manual_sync']
        computer.meta = yaml_dump(meta)
        provider.update()
예제 #5
0
def project_add():
    data = request_data()

    provider = ProjectProvider(_write_session)
    provider.add_project(
        data['name'], yaml_load(data['class_names']),
        yaml_load(data['ignore_folders'])
    )
예제 #6
0
    def create_providers(self):
        self.provider = TaskProvider(self.session)
        self.report_provider = ReportProvider(self.session)
        self.report_tasks_provider = ReportTasksProvider(self.session)
        self.report_layout_provider = ReportLayoutProvider(self.session)
        self.project_provider = ProjectProvider(self.session)

        self.storage = Storage(self.session)
        self.dag_provider = DagProvider(self.session)
예제 #7
0
파일: standard.py 프로젝트: kiminh/mlcomp
    def create_providers(self):
        self.log_info('create_providers')

        self.provider = TaskProvider(self.session)
        self.report_provider = ReportProvider(self.session)
        self.report_tasks_provider = ReportTasksProvider(self.session)
        self.report_layout_provider = ReportLayoutProvider(self.session)
        self.project_provider = ProjectProvider(self.session)

        self.storage = Storage(self.session,
                               logger=self.logger,
                               component=self.component)
        self.dag_provider = DagProvider(self.session)
예제 #8
0
파일: sync.py 프로젝트: lightforever/mlcomp
    def sync_manual(self, computer: Computer, provider: ComputerProvider):
        """
        button sync was clicked manually
        """
        if not computer.meta:
            return

        meta = yaml_load(computer.meta)
        if 'manual_sync' not in meta:
            return

        manual_sync = meta['manual_sync']

        project_provider = ProjectProvider(self.session)
        docker_provider = DockerProvider(self.session)

        dockers = docker_provider.get_online()
        project = project_provider.by_id(manual_sync['project'])
        sync_folders = manual_sync['sync_folders']
        ignore_folders = manual_sync['ignore_folders']

        sync_folders = correct_folders(sync_folders, project.name)
        ignore_folders = correct_folders(ignore_folders, project.name)

        if not isinstance(sync_folders, list):
            sync_folders = []
        if not isinstance(ignore_folders, list):
            ignore_folders = []

        for docker in dockers:
            if docker.computer == computer.name:
                continue

            source = provider.by_name(docker.computer)
            folders = [[s, ignore_folders] for s in sync_folders]

            computer.syncing_computer = source.name
            provider.update()

            try:
                sync_directed(
                    self.session,
                    target=computer,
                    source=source,
                    folders=folders
                )
            except Exception as e:
                self.process_error(e)
        del meta['manual_sync']
        computer.meta = yaml_dump(meta)
        provider.update()
예제 #9
0
파일: pipe.py 프로젝트: jingmouren/mlcomp
def dag_pipe(session: Session, config: dict, config_text: str = None):
    assert 'interfaces' in config, 'interfaces missed'
    assert 'pipes' in config, 'pipe missed'

    info = config['info']

    storage = Storage(session)
    dag_provider = DagProvider(session)

    folder = os.getcwd()
    project = ProjectProvider(session).by_name(info['project']).id
    dag = dag_provider.add(
        Dag(
            config=config_text,
            project=project,
            name=info['name'],
            docker_img=info.get('docker_img'),
            type=DagType.Pipe.value
        )
    )
    storage.upload(folder, dag)

    # Change model dags which have the same name
    ModelProvider(session
                  ).change_dag(project=project, name=info['name'], to=dag.id)
예제 #10
0
    def work(self):
        project = ProjectProvider(self.session).by_id(self.project)

        self.info(f'Task = {self.train_task} child_task: {self.child_task}')

        model = Model(created=now(),
                      name=self.name,
                      project=self.project,
                      equations='',
                      fold=self.fold)

        provider = ModelProvider(self.session)
        if self.train_task:
            task_provider = TaskProvider(self.session)
            task = task_provider.by_id(self.train_task)
            model.score_local = task.score

            task_dir = join(TASK_FOLDER, str(self.child_task or task.id))
            src_log = f'{task_dir}/log'
            models_dir = join(MODEL_FOLDER, project.name)
            os.makedirs(models_dir, exist_ok=True)

            model_path_tmp = f'{src_log}/traced.pth'
            traced = trace_model_from_checkpoint(src_log, self, file=self.file)

            model_path = f'{models_dir}/{model.name}.pth'
            model_weight_path = f'{models_dir}/{model.name}_weight.pth'
            torch.jit.save(traced, model_path_tmp)
            shutil.copy(model_path_tmp, model_path)
            file = self.file = 'best_full'
            shutil.copy(f'{src_log}/checkpoints/{file}.pth', model_weight_path)

        provider.add(model)
예제 #11
0
def sync(project: str, computer: str, only_from: bool, only_to: bool,
         online: bool):
    """
    Syncs specified project on this computer with other computers
    """
    check_statuses()

    _create_computer()
    _create_docker()

    computer = computer or socket.gethostname()
    provider = ComputerProvider(_session)
    project_provider = ProjectProvider(_session)
    computer = provider.by_name(computer)
    computers = provider.all_with_last_activtiy()
    p = project_provider.by_name(project)
    assert p, f'Project={project} is not found'

    sync_folders = yaml_load(p.sync_folders)
    ignore_folders = yaml_load(p.ignore_folders)

    sync_folders = correct_folders(sync_folders, p.name)
    ignore_folders = correct_folders(ignore_folders, p.name)

    if not isinstance(sync_folders, list):
        sync_folders = []
    if not isinstance(ignore_folders, list):
        ignore_folders = []

    folders = [[s, ignore_folders] for s in sync_folders]

    for c in computers:
        if c.name != computer.name:
            if online and (now() - c.last_activity).total_seconds() > 100:
                continue

            if not only_from:
                sync_directed(_session, computer, c, folders)
            if not only_to:
                sync_directed(_session, c, computer, folders)
예제 #12
0
    def work(self):
        project = ProjectProvider(self.session).by_id(self.project)

        self.info(f'Task = {self.train_task} child_task: {self.child_task}')

        model = Model(
            created=now(),
            name=self.name,
            project=self.project,
            equations='',
            fold=self.fold
        )

        provider = ModelProvider(self.session)
        if self.train_task:
            task_provider = TaskProvider(self.session)
            dag_provider = DagProvider(self.session)
            task = task_provider.by_id(self.train_task)
            dag = dag_provider.by_id(task.dag)

            task_dir = join(TASK_FOLDER, str(self.child_task or task.id))

            # get log directory
            config = yaml_load(dag.config)
            executor_config = config['executors'][task.executor]
            catalyst_config_file = executor_config['args']['config']
            catalyst_config_file = join(task_dir, catalyst_config_file)
            catalyst_config = yaml_load(file=catalyst_config_file)
            catalyst_logdir = catalyst_config['args']['logdir']

            model.score_local = task.score

            src_log = f'{task_dir}/{catalyst_logdir}'
            models_dir = join(MODEL_FOLDER, project.name)
            os.makedirs(models_dir, exist_ok=True)

            model_path_tmp = f'{src_log}/traced.pth'
            traced = trace_model_from_checkpoint(src_log, self, file=self.file)

            model_path = f'{models_dir}/{model.name}.pth'
            model_weight_path = f'{models_dir}/{model.name}_weight.pth'
            torch.jit.save(traced, model_path_tmp)
            shutil.copy(model_path_tmp, model_path)
            file = self.file = 'best_full'
            shutil.copy(f'{src_log}/checkpoints/{file}.pth',
                        model_weight_path)

        provider.add(model)
예제 #13
0
def dag_model_add(session: Session, data: dict):
    if not data.get('task'):
        model = Model(name=data['name'],
                      project=data['project'],
                      equations=data['equations'],
                      created=now())
        ModelProvider(session).add(model)
        return

    task_provider = TaskProvider(session)
    task = task_provider.by_id(data['task'],
                               options=joinedload(Task.dag_rel,
                                                  innerjoin=True))
    child_tasks = task_provider.children(task.id)
    computer = task.computer_assigned
    child_task = None
    if len(child_tasks) > 0:
        child_task = child_tasks[0].id
        computer = child_tasks[0].computer_assigned

    project = ProjectProvider(session).by_id(task.dag_rel.project)
    config = {
        'info': {
            'name': 'model_add',
            'project': project.name,
            'computer': computer
        },
        'executors': {
            'model_add': {
                'type': 'model_add',
                'project': data['project'],
                'task': data.get('task'),
                'name': data['name'],
                'file': data['file'],
                'child_task': child_task,
                'fold': data['fold']
            }
        }
    }

    dag_standard(session=session,
                 config=config,
                 debug=False,
                 upload_files=False)
예제 #14
0
def dag_model_add(session: Session, data: dict):
    task_provider = TaskProvider(session)
    task = task_provider.by_id(data['task'],
                               options=joinedload(Task.dag_rel,
                                                  innerjoin=True))
    child_tasks = task_provider.children(task.id)
    computer = task.computer_assigned
    child_task = None
    if len(child_tasks) > 0:
        child_task = child_tasks[0].id
        computer = child_tasks[0].computer_assigned

    project = ProjectProvider(session).by_id(task.dag_rel.project)
    interface_params = data.get('interface_params', '')
    interface_params = yaml_load(interface_params)
    config = {
        'info': {
            'name': 'model_add',
            'project': project.name,
            'computer': computer
        },
        'executors': {
            'model_add': {
                'type': 'model_add',
                'dag': data['dag'],
                'slot': data['slot'],
                'interface': data['interface'],
                'task': data.get('task'),
                'name': data['name'],
                'interface_params': interface_params,
                'child_task': child_task
            }
        }
    }

    dag_standard(session=session,
                 config=config,
                 debug=False,
                 upload_files=False)
예제 #15
0
파일: app.py 프로젝트: xang1234/mlcomp
def report_add_start():
    return {
        'projects': ProjectProvider(_read_session).get()['data'],
        'layouts': ReportLayoutProvider(_read_session).get()['data']
    }
예제 #16
0
파일: standard.py 프로젝트: kiminh/mlcomp
class DagStandardBuilder:
    def __init__(self,
                 session: Session,
                 config: dict,
                 debug: bool,
                 config_text: str = None,
                 upload_files: bool = True,
                 copy_files_from: int = None,
                 config_path: str = None,
                 control_reqs: bool = True,
                 logger=None,
                 component: ComponentType = None):
        self.session = session
        self.config = config
        self.debug = debug
        self.config_text = config_text
        self.upload_files = upload_files
        self.copy_files_from = copy_files_from
        self.config_path = config_path
        self.control_reqs = control_reqs

        self.info = config['info']
        self.layout_name = self.info.get('layout')

        self.provider = None
        self.report_provider = None
        self.report_tasks_provider = None
        self.report_layout_provider = None
        self.storage = None
        self.dag_provider = None
        self.logger = logger
        self.component = component

        self.project = None
        self.layouts = None
        self.dag = None
        self.dag_report_id = None
        self.created = None
        self.project_provider = None

    def log_info(self, message: str):
        if self.logger:
            self.logger.info(message, self.component)

    def create_providers(self):
        self.log_info('create_providers')

        self.provider = TaskProvider(self.session)
        self.report_provider = ReportProvider(self.session)
        self.report_tasks_provider = ReportTasksProvider(self.session)
        self.report_layout_provider = ReportLayoutProvider(self.session)
        self.project_provider = ProjectProvider(self.session)

        self.storage = Storage(self.session,
                               logger=self.logger,
                               component=self.component)
        self.dag_provider = DagProvider(self.session)

    def load_base(self):
        self.log_info('load_base')

        project = self.project_provider.by_name(self.info['project'])
        if project is None:
            project = self.project_provider.add_project(self.info['project'])

        self.project = project.id
        self.layouts = self.report_layout_provider.all()

    def create_report(self):
        self.log_info('create_report')

        self.dag_report_id = None
        layout_name = self.layout_name
        if layout_name:
            if layout_name not in self.layouts:
                raise Exception(f'Unknown layout = {layout_name}')

            report = Report(config=yaml_dump(self.layouts[layout_name]),
                            name=self.info['name'],
                            project=self.project,
                            layout=layout_name)
            self.report_provider.add(report)
            self.dag_report_id = report.id

    def create_dag(self):
        self.log_info('create_dag')

        dag = Dag(config=self.config_text or yaml_dump(self.config),
                  project=self.project,
                  name=self.info['name'],
                  docker_img=self.info.get('docker_img'),
                  type=DagType.Standard.value,
                  created=now(),
                  report=self.dag_report_id)

        self.dag = self.dag_provider.add(dag)

    def upload(self):
        self.log_info('upload')

        if self.upload_files:
            folder = os.path.dirname(os.path.abspath(self.config_path))
            if 'expdir' in self.config['info']:
                path = os.path.dirname(os.path.abspath(self.config_path))
                folder = os.path.abspath(
                    os.path.join(path, self.config['info']['expdir']))
            self.storage.upload(folder,
                                self.dag,
                                control_reqs=self.control_reqs)
        elif self.copy_files_from:
            self.storage.copy_from(self.copy_files_from, self.dag)

    def create_task(self, k: str, v: dict, name: str, info: dict):
        task_type = TaskType.User.value
        if v.get('task_type') == 'train' or \
                Executor.is_trainable(v['type']):
            task_type = TaskType.Train.value

        gpu = str(v.get('gpu', '0'))
        if '-' not in gpu:
            gpu = int(gpu)
            gpu_max = gpu
        else:
            gpu, gpu_max = map(int, gpu.split('-'))

        if gpu == 0 and gpu_max > 0:
            raise Exception(f'Executor = {k} Gpu_max can"t be>0 when gpu=0')

        task = Task(name=name,
                    executor=k,
                    computer=self.info.get('computer') or v.get('computer'),
                    gpu=gpu,
                    gpu_max=gpu_max,
                    cpu=v.get('cpu', 1),
                    memory=v.get('memory', 0.1),
                    dag=self.dag.id,
                    debug=self.debug,
                    steps=int(v.get('steps', '1')),
                    type=task_type)
        task.additional_info = yaml_dump(info)
        report = None
        if self.layout_name and task_type == TaskType.Train.value:
            if self.layout_name not in self.layouts:
                raise Exception(f'Unknown report = {v["report"]}')

            report_config = self.layouts[self.layout_name]
            info['report_config'] = report_config

            task.additional_info = yaml_dump(info)
            report = Report(config=yaml_dump(report_config),
                            name=task.name,
                            project=self.project,
                            layout=self.layout_name)

        return task, report

    def create_tasks(self):
        self.log_info('create_tasks')

        created = OrderedDict()
        executors = self.config['executors']

        tasks = []
        dependencies = []
        reports = []

        while len(created) < len(executors):
            for k, v in executors.items():
                valid = True
                if 'depends' in v:
                    depends = v['depends']
                    if not isinstance(depends, list):
                        depends = [depends]

                    for d in depends:
                        if d == k:
                            raise Exception(f'Executor {k} depends on itself')

                        if d not in executors:
                            raise Exception(f'Executor {k} depend on {d} '
                                            f'which does not exist')

                        valid = valid and d in created
                if valid:
                    names = []
                    infos = []
                    if 'grid' in v:
                        grid = v['grid']
                        cells = grid_cells(grid)
                        for i, (cell, cell_name) in enumerate(cells):
                            names.append(cell_name)
                            infos.append({'grid_cell': i})
                    else:
                        names.append(v.get('name', k))
                        infos.append({})

                    k_tasks = []
                    for name, info in zip(names, infos):
                        task, report = self.create_task(k,
                                                        v,
                                                        name=name,
                                                        info=info)
                        tasks.append(task)
                        k_tasks.append(task)
                        reports.append(report)

                        if 'depends' in v:
                            depends = v['depends']
                            if not isinstance(depends, list):
                                depends = [depends]

                            for d in depends:
                                for dd in created[d]:
                                    dependencies.append((task, dd))
                    created[k] = k_tasks

        not_empty_reports = [r for r in reports if r is not None]
        if len(not_empty_reports) > 0:
            self.provider.bulk_save_objects(not_empty_reports,
                                            return_defaults=True)
            for report, task in zip(reports, tasks):
                if report is not None:
                    task.report = report.id

        self.provider.bulk_save_objects(tasks, return_defaults=True)

        if len(not_empty_reports) > 0:
            report_tasks = []
            for report, task in zip(reports, tasks):
                if report is not None:
                    report_tasks.append(
                        ReportTasks(report=report.id, task=task.id))
            self.report_tasks_provider.bulk_save_objects(report_tasks)

        dependencies = [
            TaskDependence(task_id=task.id, depend_id=dd.id)
            for task, dd in dependencies
        ]
        self.provider.bulk_save_objects(dependencies)

        for k, v in created.items():
            created[k] = [vv.id for vv in v]
        self.created = created

    def build(self):
        self.create_providers()

        self.load_base()

        self.create_report()

        self.create_dag()

        self.upload()

        self.create_tasks()

        self.log_info('Done')

        return self.created
예제 #17
0
 def _configure(self, session):
     provider = ProjectProvider(session)
     provider.add_project(name='test')
     return provider
예제 #18
0
 def test_add(self, session: Session):
     provider = ProjectProvider(session)
     project = provider.add_project(name='test')
     assert provider.by_id(project.id)
예제 #19
0
파일: app.py 프로젝트: xang1234/mlcomp
def project_remove():
    id = request_data()['id']
    ProjectProvider(_write_session).remove(id)