Beispiel #1
0
    def __init__(self,
                 estimator,
                 task_definition=None,
                 task_relationship_discovery=None,
                 task_mining=None,
                 task_remodeling=None,
                 inference_integrate=None,
                 unseen_task_detect=None):

        if not task_definition:
            task_definition = {"method": "TaskDefinitionByDataAttr"}
        if not unseen_task_detect:
            unseen_task_detect = {"method": "TaskAttrFilter"}
        e = MulTaskLearning(
            estimator=estimator,
            task_definition=task_definition,
            task_relationship_discovery=task_relationship_discovery,
            task_mining=task_mining,
            task_remodeling=task_remodeling,
            inference_integrate=inference_integrate)
        self.unseen_task_detect = unseen_task_detect.get(
            "method", "TaskAttrFilter")
        self.unseen_task_detect_param = e._parse_param(
            unseen_task_detect.get("param", {}))
        config = dict(ll_kb_server=Context.get_parameters("KB_SERVER"),
                      output_url=Context.get_parameters("OUTPUT_URL", "/tmp"))
        task_index = FileOps.join_path(config['output_url'],
                                       KBResourceConstant.KB_INDEX_NAME.value)
        config['task_index'] = task_index
        super(LifelongLearning, self).__init__(estimator=e, config=config)
        self.job_kind = K8sResourceKind.LIFELONG_JOB.value
        self.kb_server = KBClient(kbserver=self.config.ll_kb_server)
Beispiel #2
0
 async def file_upload(self, file: UploadFile = File(...)):
     files = await file.read()
     filename = str(file.filename)
     output = FileOps.join_path(self.save_dir, filename)
     with open(output, "wb") as fout:
         fout.write(files)
     return f"/file/download?files={filename}&name={filename}"
Beispiel #3
0
    def save(self, model_url="", model_name=None):
        mname = model_name or self.model_name
        if os.path.isfile(self.model_save_path):
            self.model_save_path, mname = os.path.split(self.model_save_path)

        FileOps.clean_folder([self.model_save_path], clean=False)
        model_path = FileOps.join_path(self.model_save_path, mname)
        self.estimator.save(model_path)
        if model_url and FileOps.exists(model_path):
            FileOps.upload(model_path, model_url)
            model_path = model_url
        return model_path
Beispiel #4
0
    def update_status(self, data: KBUpdateResult = Body(...)):
        deploy = True if data.status else False
        tasks = data.tasks.split(",") if data.tasks else []
        with Session(bind=engine) as session:
            session.query(TaskGrp).filter(TaskGrp.name.in_(tasks)).update(
                {TaskGrp.deploy: deploy}, synchronize_session=False)

        # todo: get from kb
        _index_path = FileOps.join_path(self.save_dir, self.kb_index)
        task_info = joblib.load(_index_path)
        new_task_group = []

        default_task = task_info["task_groups"][0]
        # todo: get from transfer learning
        for task_group in task_info["task_groups"]:
            if not ((task_group.entry in tasks) == deploy):
                new_task_group.append(default_task)
                continue
            new_task_group.append(task_group)
        task_info["task_groups"] = new_task_group
        _index_path = FileOps.join_path(self.save_dir, self.kb_index)
        FileOps.dump(task_info, _index_path)
        return f"/file/download?files={self.kb_index}&name={self.kb_index}"
Beispiel #5
0
 def load(self, model_url="", model_name=None, **kwargs):
     mname = model_name or self.model_name
     if callable(self.estimator):
         varkw = self.parse_kwargs(self.estimator, **kwargs)
         self.estimator = self.estimator(**varkw)
     if model_url and os.path.isfile(model_url):
         self.model_save_path, mname = os.path.split(model_url)
     elif os.path.isfile(self.model_save_path):
         self.model_save_path, mname = os.path.split(self.model_save_path)
     model_path = FileOps.join_path(self.model_save_path, mname)
     if model_url:
         model_path = FileOps.download(model_url, model_path)
     self.has_load = True
     if not (hasattr(self.estimator, "load")
             and os.path.exists(model_path)):
         return
     return self.estimator.load(model_url=model_path)
Beispiel #6
0
 def run(self):
     while self.run_flag:
         time.sleep(self.check_time)
         conf = FileOps.download(self.hot_update_conf)
         if not (conf and FileOps.exists(conf)):
             continue
         with open(conf, "r") as fin:
             try:
                 conf_msg = json.load(fin)
                 model_msg = conf_msg["model_config"]
                 latest_version = str(model_msg["model_update_time"])
                 model = FileOps.download(
                     model_msg["model_path"],
                     FileOps.join_path(self.temp_path,
                                       f"model.{latest_version}"))
             except (json.JSONDecodeError, KeyError):
                 LOGGER.error(f"fail to parse model hot update config: "
                              f"{self.hot_update_conf}")
                 continue
         if not (model and FileOps.exists(model)):
             continue
         if latest_version == self.version:
             continue
         self.version = latest_version
         with self.MODEL_MANIPULATION_SEM:
             LOGGER.info(f"Update model start with version {self.version}")
             try:
                 self.production_estimator.load(model)
                 status = K8sResourceKindStatus.COMPLETED.value
                 LOGGER.info(f"Update model complete "
                             f"with version {self.version}")
             except Exception as e:
                 LOGGER.error(f"fail to update model: {e}")
                 status = K8sResourceKindStatus.FAILED.value
             if self.callback:
                 self.callback(task_info=None, status=status, kind="deploy")
         gc.collect()
Beispiel #7
0
    def train(self,
              train_data,
              valid_data=None,
              post_process=None,
              action="initial",
              **kwargs):
        """
        fit for update the knowledge based on training data.

        Parameters
        ----------
        train_data : BaseDataSource
            Train data, see `sedna.datasources.BaseDataSource` for more detail.
        valid_data : BaseDataSource
            Valid data, BaseDataSource or None.
        post_process : function
            function or a registered method, callback after `estimator` train.
        action : str
            `update` or `initial` the knowledge base
        kwargs : Dict
            parameters for `estimator` training, Like:
            `early_stopping_rounds` in Xgboost.XGBClassifier

        Returns
        -------
        train_history : object
        """

        callback_func = None
        if post_process is not None:
            callback_func = ClassFactory.get_cls(ClassType.CALLBACK,
                                                 post_process)
        res, task_index_url = self.estimator.train(
            train_data=train_data, valid_data=valid_data, **kwargs
        )  # todo: Distinguishing incremental update and fully overwrite

        if isinstance(task_index_url, str) and FileOps.exists(task_index_url):
            task_index = FileOps.load(task_index_url)
        else:
            task_index = task_index_url

        extractor = task_index['extractor']
        task_groups = task_index['task_groups']

        model_upload_key = {}
        for task in task_groups:
            model_file = task.model.model
            save_model = FileOps.join_path(self.config.output_url,
                                           os.path.basename(model_file))
            if model_file not in model_upload_key:
                model_upload_key[model_file] = FileOps.upload(
                    model_file, save_model)
            model_file = model_upload_key[model_file]

            try:
                model = self.kb_server.upload_file(save_model)
            except Exception as err:
                self.log.error(
                    f"Upload task model of {model_file} fail: {err}")
                model = set_backend(
                    estimator=self.estimator.estimator.base_model)
                model.load(model_file)
            task.model.model = model

            for _task in task.tasks:
                sample_dir = FileOps.join_path(
                    self.config.output_url,
                    f"{_task.samples.data_type}_{_task.entry}.sample")
                task.samples.save(sample_dir)
                try:
                    sample_dir = self.kb_server.upload_file(sample_dir)
                except Exception as err:
                    self.log.error(
                        f"Upload task samples of {_task.entry} fail: {err}")
                _task.samples.data_url = sample_dir

        save_extractor = FileOps.join_path(
            self.config.output_url,
            KBResourceConstant.TASK_EXTRACTOR_NAME.value)
        extractor = FileOps.dump(extractor, save_extractor)
        try:
            extractor = self.kb_server.upload_file(extractor)
        except Exception as err:
            self.log.error(f"Upload task extractor fail: {err}")
        task_info = {"task_groups": task_groups, "extractor": extractor}
        fd, name = tempfile.mkstemp()
        FileOps.dump(task_info, name)

        index_file = self.kb_server.update_db(name)
        if not index_file:
            self.log.error(f"KB update Fail !")
            index_file = name
        FileOps.upload(index_file, self.config.task_index)

        task_info_res = self.estimator.model_info(
            self.config.task_index, relpath=self.config.data_path_prefix)
        self.report_task_info(None, K8sResourceKindStatus.COMPLETED.value,
                              task_info_res)
        self.log.info(f"Lifelong learning Train task Finished, "
                      f"KB idnex save in {self.config.task_index}")
        return callback_func(self.estimator, res) if callback_func else res
Beispiel #8
0
    def update(self, task: UploadFile = File(...)):
        tasks = task.file.read()
        fd, name = tempfile.mkstemp()
        with open(name, "wb") as fout:
            fout.write(tasks)
        os.close(fd)
        upload_info = joblib.load(name)

        with Session(bind=engine) as session:
            for task_group in upload_info["task_groups"]:
                grp, g_create = get_or_create(session=session,
                                              model=TaskGrp,
                                              name=task_group.entry)
                if g_create:
                    grp.sample_num = 0
                    grp.task_num = 0
                    session.add(grp)
                grp.sample_num += len(task_group.samples)
                grp.task_num += len(task_group.tasks)
                t_id = []
                for task in task_group.tasks:
                    t_obj, t_create = get_or_create(session=session,
                                                    model=Tasks,
                                                    name=task.entry)
                    if task.meta_attr:
                        t_obj.task_attr = json.dumps(task.meta_attr)
                    if t_create:
                        session.add(t_obj)

                    sample_obj = Samples(data_type=task.samples.data_type,
                                         sample_num=len(task.samples),
                                         data_url=getattr(
                                             task, 'data_url', ''))
                    session.add(sample_obj)

                    session.flush()
                    session.commit()
                    tsample = TaskSample(sample=sample_obj, task=t_obj)
                    session.add(tsample)
                    session.flush()
                    t_id.append(t_obj.id)

                model_obj, m_create = get_or_create(session=session,
                                                    model=TaskModel,
                                                    task=grp)
                model_obj.model_url = task_group.model.model
                model_obj.is_current = False
                if m_create:
                    session.add(model_obj)
                session.flush()
                session.commit()
                transfer_radio = 1 / grp.task_num
                for t in t_id:
                    t_obj, t_create = get_or_create(session=session,
                                                    model=TaskRelation,
                                                    task_id=t,
                                                    grp=grp)
                    t_obj.transfer_radio = transfer_radio
                    if t_create:
                        session.add(t_obj)
                        session.flush()
                    session.commit()
                session.query(TaskRelation).filter(
                    TaskRelation.grp == grp).update(
                        {"transfer_radio": transfer_radio})

            session.commit()

        # todo: get from kb
        _index_path = FileOps.join_path(self.save_dir, self.kb_index)
        _index_path = FileOps.dump(upload_info, _index_path)

        return f"/file/download?files={self.kb_index}&name={self.kb_index}"
Beispiel #9
0
 async def file_download(self, files: str, name: str = ""):
     files = FileOps.join_path(self.save_dir, files)
     return self._file_endpoint(files, name=name)
Beispiel #10
0
 def _get_db_index(self):
     _index_path = FileOps.join_path(self.save_dir, self.kb_index)
     if not FileOps.exists(_index_path):  # todo: get from kb
         pass
     return _index_path
Beispiel #11
0
 def load_weights(self):
     model_path = FileOps.join_path(self.model_save_path, self.model_name)
     if os.path.exists(model_path):
         self.estimator.load_weights(model_path)
Beispiel #12
0
 def model_path(self):
     if os.path.isfile(self.config.model_url):
         return self.config.model_url
     return self.get_parameters('model_path') or FileOps.join_path(
         self.config.model_url, self.estimator.model_name)