예제 #1
0
def _checkout_response_json(response):
    # print("Response: ")
    # print(response.text)
    response_dict = util.loads(response.text)
    code = response_dict["code"]
    if code != 0:
        raise Exception(f"Update failed, {response.text}")
    return response_dict['data']
예제 #2
0
    def get(self, dataset_name, model_name, batch_predict_job_name, *args, **kwargs):
        # 1. query all the message of request_id  todo move to service
        with db.open_session() as s:
            messages = s.query(MessageEntity).filter(MessageEntity.author == batch_predict_job_name).order_by(MessageEntity.create_datetime.asc()).all()
            messages_dict_list = []
            for m in messages:
                messages_dict_list.append(util.loads(m.content))

        # 2. response
        response = \
            {
                "batch_predict_job_name": batch_predict_job_name,
                "steps": messages_dict_list
            }
        self.response_json(response)
예제 #3
0
    def delete(self, dataset_name, *args, **kwargs):
        # 1. validate param
        if dataset_name is None:
            raise IllegalParamException("dataset_name", None, "not empty")

        path_dataset = P.join(consts.PATH_DATASET, dataset_name)
        if not P.exists(path_dataset):
            raise EntityNotExistsException(
                EntityNotExistsException.Entities.Dataset, dataset_name)

        meta_file = P.join(path_dataset, 'meta.json')
        if not P.exists(meta_file):
            raise ValueError(f"Dataset={dataset_name} is broken.")

        with open(meta_file, 'r') as f:
            meta_dict = util.loads(f.read())

        self.response_json(meta_dict)
예제 #4
0
    def get(self, dataset_name, analyze_job_name, *args, **kwargs):
        # 1. validate param
        if analyze_job_name is None:
            raise IllegalParamException("analyze_job_name", None, "not empty")

        # 2. query all the message of request_id  todo move to service
        with db.open_session() as s:
            messages = s.query(MessageEntity).filter(
                MessageEntity.author == analyze_job_name).order_by(
                    MessageEntity.create_datetime.asc()).all()
            messages_dict_list = []
            for m in messages:
                messages_dict_list.append(util.loads(m.content))

        # 3. response
        response = \
            {
                "analyze_job_name": analyze_job_name,
                "steps": messages_dict_list
            }
        self.response_json(response)
예제 #5
0
    def add_predict_process_step(self, model_name: str, job_name: str,
                                 step: JobStep):
        step_type = step.type
        with db.open_session() as s:
            # 1.  check temporary model exists
            model = self.model_dao.require_by_name(s, model_name)

            # 2. check event type, one type one record
            messages = s.query(MessageEntity).filter(
                MessageEntity.author == job_name).all()
            for m in messages:
                if step_type == util.loads(m.content).get('type'):
                    raise Exception(
                        f"Event type = {step_type} already exists .")

            # 3. create a new message
            content = util.dumps(step.to_dict())
            message = MessageEntity(id=util.short_uuid(),
                                    author=job_name,
                                    content=content,
                                    create_datetime=util.get_now_datetime())
            s.add(message)
예제 #6
0
    def add_train_process_step(self, train_job_name, req_dict):
        # [1]. read & check params
        step_type = util.require_in_dict(req_dict, 'type', str)
        step_status = util.require_in_dict(req_dict, 'status', str)
        step_extension = util.get_from_dict(req_dict, 'extension', dict)

        if step_type not in [TrainStep.Types.Load, TrainStep.Types.Optimize, TrainStep.Types.OptimizeStart, TrainStep.Types.Persist, TrainStep.Types.Evaluate, TrainStep.Types.FinalTrain, TrainStep.Types.Searched]:
            raise ValueError(f"Unknown step type = {step_type}")

        if step_status not in [JobStep.Status.Succeed, JobStep.Status.Failed]:
            raise ValueError(f"Unknown status = {step_status}")

        # [2]. save message
        with db.open_session() as s:
            # [2.1].  check temporary model exists
            model = self.model_dao.find_by_train_job_name(s, train_job_name)
            model_name = model.name
            # [2.2]. check event type, one type one record
            messages = s.query(MessageEntity).filter(MessageEntity.author == train_job_name).all()
            for m in messages:
                if step_type == util.loads(m.content).get('type'):
                    if step_type not in [TrainStep.Types.OptimizeStart, TrainStep.Types.Optimize]:
                        raise Exception(f"Event type = {step_type} already exists .")

            # [2.3]. create a new message
            content = util.dumps(req_dict)
            message = MessageEntity(id=util.short_uuid(), author=train_job_name, content=content, create_datetime=util.get_now_datetime())
            s.add(message)

            # [2.4]. handle analyze event
            current_progress = model.progress
            # todo check in code body self._check_progress_change(step_type, current_progress)  # add failed status
            if step_type == TrainStep.Types.Evaluate:
                if step_status == JobStep.Status.Succeed:
                    self._update_model(s, model_name, step_type, {"performance": step_extension['performance']})
                else:
                    self._update_model(s, model_name, step_type, {"status": ModelStatusType.Failed, "finish_datetime": util.get_now_datetime()})

            elif step_type == TrainStep.Types.Load:
                if step_status == JobStep.Status.Succeed:
                    self._update_model(s, model_name, step_type, {"status": ModelStatusType.Running})
                else:
                    self._update_model(s, model_name, step_type, {"status": ModelStatusType.Failed, "finish_datetime": util.get_now_datetime()})

            elif step_type == TrainStep.Types.OptimizeStart:
                pass
                # train_trail_no = step_extension.get('trail_no')
                # if train_trail_no is None or not isinstance(train_trail_no, int):
                #     raise ValueError(f"Param trail_no can not be None and should be int but is : {train_trail_no}")
                # # upload trail number
                # self._update_model(s, model_name, step_type, {"train_trail_no": train_trail_no})

            elif step_type == TrainStep.Types.Optimize:
                train_trail_no = step_extension.get('trail_no')
                # update trails
                # load current trail and append new
                trails = model.trails
                if model.trails is None:
                    trails = []
                trails.append(step_extension)
                self._update_model(s, model_name, step_type, {"train_trail_no": train_trail_no, "score": step_extension.get('reward'), "trails": trails})

            elif step_type == TrainStep.Types.Persist:
                model_file_size = step_extension['model_file_size']
                self._update_model(s, model_name, step_type, {"model_file_size": model_file_size,
                                                              "status": ModelStatusType.Succeed,
                                                              "finish_datetime": util.get_now_datetime()})
            else:
                self._update_model(s, model_name, step_type, {})
예제 #7
0
    def add_analyze_process_step(self, dataset_name, analyze_job_name,
                                 step: JobStep):
        step_type = step.type
        with db.open_session() as s:
            # 1.1.  check dataset exists
            d = s.query(DatasetEntity).filter(
                DatasetEntity.name == dataset_name).first()
            if d is None:
                raise EntityNotExistsException(DatasetEntity, dataset_name)

            # 1.2. check event type, one type one record
            messages = s.query(MessageEntity).filter(
                MessageEntity.author == analyze_job_name).all()
            for m in messages:
                if step_type == util.loads(m.content).get('type'):
                    raise Exception(
                        f"Event type = {step_type} already exists .")

        # 2. handle event
        with db.open_session() as s:
            # 2.1. create a new message
            content = util.dumps(step.to_dict())
            message = MessageEntity(id=util.short_uuid(),
                                    author=analyze_job_name,
                                    content=content,
                                    create_datetime=util.get_now_datetime())
            s.add(message)

            # 2.2. handle analyze event
            if step_type == AnalyzeStep.Types.Analyzed:
                # update temporary dataset
                # todo handle failed analyze
                if step.status == JobStep.Status.Succeed:
                    hints = step.extension.pop("hints")
                    d_stats = DatasetStats.load_dict(step.extension)

                    features_str = [f.to_dict() for f in d_stats.features]
                    update_fields = \
                        {
                            "has_header": d_stats.has_header,
                            "extension": step.extension,
                            "n_cols": d_stats.n_cols,
                            "n_rows": d_stats.n_rows,
                            "features": features_str,
                            "hints": hints,
                            "feature_summary": d_stats.feature_summary.to_dict(),
                            "status": DatasetEntity.Status.Analyzed
                         }
                else:
                    update_fields = {"status": DatasetEntity.Status.Failed}
                self.dataset_dao.update_by_name(s, dataset_name, update_fields)

            elif step_type == AnalyzeStep.Types.PatchCorrelation:
                # 1. check dataset status, only analyzed can calc relativity
                dataset = self.dataset_dao.require_by_name(s, dataset_name)
                if dataset.status != AnalyzeStep.Types.Analyzed:
                    raise ValueError(
                        f"Dataset {dataset_name} status is not {AnalyzeStep.Types.Analyzed} ."
                    )

                request_label_col = step.extension.get("label_col")
                if request_label_col != dataset.label_col:
                    raise ValueError(
                        f"Dataset {dataset_name} label col is {dataset.label_col} but received result is for {request_label_col}"
                    )

                # 2. read extension
                corr_dict = step.extension.get('corr')

                # 3. load & update features
                features = dataset.to_dataset_stats().features
                for f in features:
                    correlation = corr_dict.get(f.name)
                    f.correlation = FeatureCorrelation(
                        value=correlation,
                        status=FeatureCorrelation.calc_status(
                            correlation, request_label_col == f.name))

                # 4. sort features by  abs correlation
                features = sorted(features,
                                  key=lambda f: abs(f.correlation.value),
                                  reverse=True)

                feature_dict_list = []
                for f in features:
                    feature_dict_list.append(f.to_dict())

                # 5. push back database
                self.dataset_dao.update_by_name(
                    s, dataset_name, {"features": feature_dict_list})
예제 #8
0
 def assert_response_and_get(self, response):
     self.assertEqual(response.code, 200)
     response_body = util.loads(response.body)
     assert response_body['code'] == 0
     return response_body['data']