Ejemplo n.º 1
0
    def work(self):
        project = ProjectProvider(self.session).by_id(self.project)

        self.info(f'Task = {self.train_task} child_task: {self.child_task}')

        model = Model(created=now(),
                      name=self.name,
                      project=self.project,
                      equations='',
                      fold=self.fold)

        provider = ModelProvider(self.session)
        if self.train_task:
            task_provider = TaskProvider(self.session)
            task = task_provider.by_id(self.train_task)
            model.score_local = task.score

            task_dir = join(TASK_FOLDER, str(self.child_task or task.id))
            src_log = f'{task_dir}/log'
            models_dir = join(MODEL_FOLDER, project.name)
            os.makedirs(models_dir, exist_ok=True)

            model_path_tmp = f'{src_log}/traced.pth'
            traced = trace_model_from_checkpoint(src_log, self, file=self.file)

            model_path = f'{models_dir}/{model.name}.pth'
            model_weight_path = f'{models_dir}/{model.name}_weight.pth'
            torch.jit.save(traced, model_path_tmp)
            shutil.copy(model_path_tmp, model_path)
            file = self.file = 'best_full'
            shutil.copy(f'{src_log}/checkpoints/{file}.pth', model_weight_path)

        provider.add(model)
Ejemplo n.º 2
0
def model_remove():
    data = request_data()
    provider = ModelProvider(_write_session)
    model = provider.by_id(data['id'], joined_load=[Model.project_rel])
    celery_tasks.remove_model(_write_session, model.project_rel.name,
                              model.name)
    provider.remove(model.id)
Ejemplo n.º 3
0
    def work(self):
        self.create_base()
        parts = self.generate_parts(self.count())

        for preds in self.solve(self.key(), parts):
            score = self.score(preds)
            if self.layout and self.plot_count > 0:
                self.plot(preds, score)

        score = self.score_final()
        if isnan(score):
            score = -1
        else:
            score = float(score)

        if self.layout:
            self.plot_final(score)

        self.task.score = score
        self.task_provider.update()

        if self.model_id:
            provider = ModelProvider(self.session)
            model = provider.by_id(self.model_id)
            model.score_local = score
            provider.commit()
Ejemplo n.º 4
0
def dag_model_start(session: Session, data: dict):
    provider = ModelProvider(session)
    model = provider.by_id(data['model_id'])
    dag_provider = DagProvider(session)
    dag = dag_provider.by_id(data['dag'], joined_load=[Dag.project_rel])

    project = dag.project_rel
    src_config = Config.from_yaml(dag.config)
    pipe = src_config['pipes'][data['pipe']['name']]

    equations = yaml_load(model.equations)
    versions = data['pipe']['versions']

    if len(versions) > 0:
        version = data['pipe']['version']
        pipe_equations = yaml_load(version['equations'])
        found_version = versions[0]
        for v in versions:
            if v['name'] == version['name']:
                found_version = v
                break

        found_version['used'] = now()

        if len(pipe) == 1:
            pipe[list(pipe)[0]].update(pipe_equations)
        else:
            pipe.update(pipe_equations)

    equations[data['pipe']['name']] = versions
    model.equations = yaml_dump(equations)

    for v in pipe.values():
        v['model_id'] = model.id
        v['model_name'] = model.name

    config = {
        'info': {
            'name': data['pipe']['name'],
            'project': project.name
        },
        'executors': pipe
    }

    if model.dag:
        old_dag = dag_provider.by_id(model.dag)
        if old_dag.name != dag.name:
            model.dag = dag.id
    else:
        model.dag = dag.id

    provider.commit()

    dag_standard(session=session,
                 config=config,
                 debug=False,
                 upload_files=False,
                 copy_files_from=data['dag'])
Ejemplo n.º 5
0
    def work(self):
        project = ProjectProvider(self.session).by_id(self.project)

        self.info(f'Task = {self.train_task} child_task: {self.child_task}')

        model = Model(
            created=now(),
            name=self.name,
            project=self.project,
            equations='',
            fold=self.fold
        )

        provider = ModelProvider(self.session)
        if self.train_task:
            task_provider = TaskProvider(self.session)
            dag_provider = DagProvider(self.session)
            task = task_provider.by_id(self.train_task)
            dag = dag_provider.by_id(task.dag)

            task_dir = join(TASK_FOLDER, str(self.child_task or task.id))

            # get log directory
            config = yaml_load(dag.config)
            executor_config = config['executors'][task.executor]
            catalyst_config_file = executor_config['args']['config']
            catalyst_config_file = join(task_dir, catalyst_config_file)
            catalyst_config = yaml_load(file=catalyst_config_file)
            catalyst_logdir = catalyst_config['args']['logdir']

            model.score_local = task.score

            src_log = f'{task_dir}/{catalyst_logdir}'
            models_dir = join(MODEL_FOLDER, project.name)
            os.makedirs(models_dir, exist_ok=True)

            model_path_tmp = f'{src_log}/traced.pth'
            traced = trace_model_from_checkpoint(src_log, self, file=self.file)

            model_path = f'{models_dir}/{model.name}.pth'
            model_weight_path = f'{models_dir}/{model.name}_weight.pth'
            torch.jit.save(traced, model_path_tmp)
            shutil.copy(model_path_tmp, model_path)
            file = self.file = 'best_full'
            shutil.copy(f'{src_log}/checkpoints/{file}.pth',
                        model_weight_path)

        provider.add(model)
Ejemplo n.º 6
0
    def __init__(self,
                 model_id: int = None,
                 suffix: str = '',
                 max_count=None,
                 part_size: int = None,
                 cache_names: List[str] = (),
                 **kwargs):
        super().__init__(**kwargs)

        self.__dict__.update(kwargs)
        self.model_id = model_id
        self.suffix = suffix
        self.max_count = max_count
        self.part_size = part_size
        self.part = None
        self.cache = dict()
        self.cache_names = cache_names
        self.model_name = kwargs.get('model_name')
        self.name = kwargs.get('name')
        if not self.model_name and self.model_id:
            self.model_name = ModelProvider(self.session).by_id(
                self.model_id).name
        if not self.name:
            self.name = self.model_name

        self.suffix = self._solve(suffix)
Ejemplo n.º 7
0
def dag_pipe(session: Session, config: dict, config_text: str = None):
    assert 'interfaces' in config, 'interfaces missed'
    assert 'pipes' in config, 'pipe missed'

    info = config['info']

    storage = Storage(session)
    dag_provider = DagProvider(session)

    folder = os.getcwd()
    project = ProjectProvider(session).by_name(info['project']).id
    dag = dag_provider.add(
        Dag(
            config=config_text,
            project=project,
            name=info['name'],
            docker_img=info.get('docker_img'),
            type=DagType.Pipe.value
        )
    )
    storage.upload(folder, dag)

    # Change model dags which have the same name
    ModelProvider(session
                  ).change_dag(project=project, name=info['name'], to=dag.id)
Ejemplo n.º 8
0
    def work(self):
        self.message = f'Task id = {self.task.id}'

        prob = np.load(self.prob_file)
        argmax = prob.argmax(axis=1)
        pd.DataFrame({
            'ImageId': np.arange(1, len(argmax) + 1),
            'Label': argmax
        }).to_csv(self.out_file, index=False)

        score = super().work()

        if self.model_id:
            provider = ModelProvider(self.session)
            model = provider.by_id(self.model_id)
            model.score_public = score
            provider.commit()
Ejemplo n.º 9
0
def dag_model_start(session: Session, data: dict):
    provider = ModelProvider(session)
    model = provider.by_id(data['model_id'])
    dag = DagProvider(session
                      ).by_id(data['dag'], joined_load=[Dag.project_rel])

    project = dag.project_rel
    src_config = Config.from_yaml(dag.config)
    pipe = src_config['pipes'][data['pipe']]
    for k, v in pipe.items():
        if v.get('slot') != data['slot']:
            continue
        params = yaml_load(data['interface_params'])
        slot = {
            'interface': data['interface'],
            'interface_params': params,
            'slot': k,
            'name': model.name,
            'id': data['model_id']
        }
        v['slot'] = slot

    config = {
        'info': {
            'name': data['pipe'],
            'project': project.name
        },
        'executors': pipe
    }

    dag_standard(
        session=session,
        config=config,
        debug=False,
        upload_files=False,
        copy_files_from=data['dag']
    )

    model.dag = data['dag']
    model.interface = data['interface']
    model.interface_params = data['interface_params']
    model.slot = data['slot']

    provider.commit()
Ejemplo n.º 10
0
    def work(self):
        submissions = api.competition_submissions(self.competition)
        submission_refs = {s.ref for s in submissions}

        if self.submit_type == 'file':
            self.file_submit()
        else:
            self.kernel_submit()

        self.info('waiting for the submission on Kaggle')

        step = 10
        for i in range(int(self.wait_seconds // step)):
            try:
                submissions = api.competition_submissions(self.competition)
                for s in submissions:
                    if s.ref not in submission_refs:
                        if s.status == 'complete':
                            if s.publicScore is None:
                                raise Exception('Submission is complete, '
                                                'but publicScore is None')
                            score = float(s.publicScore)
                            if self.model_id:
                                provider = ModelProvider(self.session)
                                model = provider.by_id(self.model_id)
                                model.score_public = score
                                provider.commit()

                            return {'res': score}
                        elif s.status == 'error':
                            raise Exception(f'Submission error '
                                            f'on Kaggle: {s.errorDescription}')

                        break
            except TypeError:
                pass

            time.sleep(step)
        raise Exception(f'Submission is not '
                        f'complete after {self.wait_seconds}')
Ejemplo n.º 11
0
def dag_model_add(session: Session, data: dict):
    if not data.get('task'):
        model = Model(name=data['name'],
                      project=data['project'],
                      equations=data['equations'],
                      created=now())
        ModelProvider(session).add(model)
        return

    task_provider = TaskProvider(session)
    task = task_provider.by_id(data['task'],
                               options=joinedload(Task.dag_rel,
                                                  innerjoin=True))
    child_tasks = task_provider.children(task.id)
    computer = task.computer_assigned
    child_task = None
    if len(child_tasks) > 0:
        child_task = child_tasks[0].id
        computer = child_tasks[0].computer_assigned

    project = ProjectProvider(session).by_id(task.dag_rel.project)
    config = {
        'info': {
            'name': 'model_add',
            'project': project.name,
            'computer': computer
        },
        'executors': {
            'model_add': {
                'type': 'model_add',
                'project': data['project'],
                'task': data.get('task'),
                'name': data['name'],
                'file': data['file'],
                'child_task': child_task,
                'fold': data['fold']
            }
        }
    }

    dag_standard(session=session,
                 config=config,
                 debug=False,
                 upload_files=False)
Ejemplo n.º 12
0
    def work(self):
        task_provider = TaskProvider(self.session)
        task = task_provider.by_id(self.train_task)
        dag = DagProvider(self.session).by_id(self.dag_pipe,
                                              joined_load=[Dag.project_rel])

        task_dir = join(TASK_FOLDER, str(self.child_task or task.id))
        src_log = f'{task_dir}/log'
        models_dir = join(MODEL_FOLDER, dag.project_rel.name)
        os.makedirs(models_dir, exist_ok=True)

        self.info(f'Task = {self.task} child_task: {self.child_task}')

        model_path_tmp = f'{src_log}/traced.pth'
        traced = trace_model_from_checkpoint(src_log, self)

        model = Model(dag=self.dag_pipe,
                      interface=self.interface,
                      slot=self.slot,
                      score_local=task.score,
                      created=now(),
                      name=self.name,
                      project=dag.project,
                      interface_params=yaml_dump(self.interface_params))
        provider = ModelProvider(self.session)
        provider.add(model, commit=False)
        try:
            model_path = f'{models_dir}/{model.name}.pth'
            model_weight_path = f'{models_dir}/{model.name}_weight.pth'
            torch.jit.save(traced, model_path_tmp)
            shutil.copy(model_path_tmp, model_path)
            shutil.copy(f'{src_log}/checkpoints/best.pth', model_weight_path)

            interface_params = yaml_load(model.interface_params)
            interface_params['file'] = join('models', model.name + '.pth')
            model.interface_params = yaml_dump(interface_params)
            provider.update()
        except Exception as e:
            provider.rollback()
            raise e
Ejemplo n.º 13
0
def models():
    data = request_data()
    options = PaginatorOptions(**data['paginator'])
    provider = ModelProvider(_read_session)
    res = provider.get(data, options)
    return res
Ejemplo n.º 14
0
def model_start_begin():
    data = request_data()
    return ModelProvider(_read_session).model_start_begin(data['model_id'])