def insert_metrics_into_db(self, metric_namespace: str, metric_name: str, data_type: int, kv, job_level=False): try: tracking_metric = self.get_dynamic_db_model( TrackingMetric, self.job_id)() tracking_metric.f_job_id = self.job_id tracking_metric.f_component_name = ( self.component_name if not job_level else job_utils.job_virtual_component_name()) tracking_metric.f_task_id = self.task_id tracking_metric.f_task_version = self.task_version tracking_metric.f_role = self.role tracking_metric.f_party_id = self.party_id tracking_metric.f_metric_namespace = metric_namespace tracking_metric.f_metric_name = metric_name tracking_metric.f_type = data_type default_db_source = tracking_metric.to_json() tracking_metric_data_source = [] for k, v in kv: db_source = default_db_source.copy() db_source['f_key'] = serialize_b64(k) db_source['f_value'] = serialize_b64(v) db_source['f_create_time'] = current_timestamp() tracking_metric_data_source.append(db_source) self.bulk_insert_into_db( self.get_dynamic_db_model(TrackingMetric, self.job_id), tracking_metric_data_source) except Exception as e: schedule_logger(self.job_id).exception( "An exception where inserted metric {} of metric namespace: {} to database:\n{}" .format(metric_name, metric_namespace, e))
def insert_summary_into_db(self, summary_data: dict): try: summary_model = self.get_dynamic_db_model(ComponentSummary, self.job_id) DB.create_tables([summary_model]) summary_obj = summary_model.get_or_none( summary_model.f_job_id == self.job_id, summary_model.f_component_name == self.component_name, summary_model.f_role == self.role, summary_model.f_party_id == self.party_id, summary_model.f_task_id == self.task_id, summary_model.f_task_version == self.task_version) if summary_obj: summary_obj.f_summary = serialize_b64(summary_data, to_str=True) summary_obj.f_update_time = current_timestamp() summary_obj.save() else: self.get_dynamic_db_model( ComponentSummary, self.job_id).create(f_job_id=self.job_id, f_component_name=self.component_name, f_role=self.role, f_party_id=self.party_id, f_task_id=self.task_id, f_task_version=self.task_version, f_summary=serialize_b64(summary_data, to_str=True), f_create_time=current_timestamp()) except Exception as e: schedule_logger(self.job_id).exception( "An exception where querying summary job id: {} " "component name: {} to database:\n{}".format( self.job_id, self.component_name, e))
def get_table_meta(self, table_info): schedule_logger(self.job_id).info(f'start get table meta:{table_info}') table_meta_dict = storage.StorageTableMeta(namespace=table_info.get("namespace"), name=table_info.get("table_name"), create_address=False).to_dict() schedule_logger(self.job_id).info(f'get table meta success: {table_meta_dict}') table_meta_dict["part_of_data"] = serialize_b64(table_meta_dict["part_of_data"], to_str=True) table_meta_dict["schema"] = serialize_b64(table_meta_dict["schema"], to_str=True) return table_meta_dict
def log_metric_data_common(self, metric_namespace: str, metric_name: str, metrics: List[typing.Union[Metric, dict]], job_level=False): LOGGER.info("Request save job {} task {} {} on {} {} metric {} {} data".format(self.job_id, self.task_id, self.task_version, self.role, self.party_id, metric_namespace, metric_name)) request_body = {} request_body['metric_namespace'] = metric_namespace request_body['metric_name'] = metric_name request_body['metrics'] = [serialize_b64(metric if isinstance(metric, Metric) else Metric.from_dict(metric), to_str=True) for metric in metrics] request_body['job_level'] = job_level response = api_utils.local_api(job_id=self.job_id, method='POST', endpoint='/tracker/{}/{}/{}/{}/{}/{}/metric_data/save'.format( self.job_id, self.component_name, self.task_id, self.task_version, self.role, self.party_id), json_body=request_body) if response['retcode'] != RetCode.SUCCESS: raise Exception(f"log metric(namespace: {metric_namespace}, name: {metric_name}) data error, response code: {response['retcode']}, msg: {response['retmsg']}")
def db_value(self, value): if self._serialized_type == SerializedType.PICKLE: return serialize_b64(value, to_str=True) elif self._serialized_type == SerializedType.JSON: if value is None: return None return json_dumps(value, with_type=True) else: raise ValueError(f"the serialized type {self._serialized_type} is not supported")
def create_table_meta(self, table_meta): request_body = dict() for k, v in table_meta.to_dict().items(): if k == "part_of_data": request_body[k] = serialize_b64(v, to_str=True) elif k == "schema": request_body[k] = serialize_b64(v, to_str=True) elif issubclass(type(v), AddressABC): request_body[k] = v.__dict__ else: request_body[k] = v response = api_utils.local_api(job_id=self.job_id, method='POST', endpoint='/tracker/{}/{}/{}/{}/{}/{}/table_meta/create'.format( self.job_id, self.component_name, self.task_id, self.task_version, self.role, self.party_id), json_body=request_body) if response['retcode'] != RetCode.SUCCESS: raise Exception(f"create table meta failed:{response['retmsg']}")
def store(self, model_id: str, model_version: str, store_address: dict, force_update: bool = False): """ Store the model from local cache to mysql :param model_id: :param model_version: :param store_address: :param force_update: :return: """ try: self.get_connection(config=store_address) DB.create_tables([MachineLearningModel]) model = PipelinedModel(model_id=model_id, model_version=model_version) LOGGER.info("start store model {} {}".format(model_id, model_version)) with DB.connection_context(): with open(model.packaging_model(), "rb") as fr: slice_index = 0 while True: content = fr.read(SLICE_MAX_SIZE) if content: model_in_table = MachineLearningModel() model_in_table.f_create_time = current_timestamp() model_in_table.f_model_id = model_id model_in_table.f_model_version = model_version model_in_table.f_content = serialize_b64(content, to_str=True) model_in_table.f_size = sys.getsizeof(model_in_table.f_content) model_in_table.f_slice_index = slice_index if force_update: model_in_table.save(only=[MachineLearningModel.f_content, MachineLearningModel.f_size, MachineLearningModel.f_update_time, MachineLearningModel.f_slice_index]) LOGGER.info("update model {} {} slice index {} content".format(model_id, model_version, slice_index)) else: model_in_table.save(force_insert=True) slice_index += 1 LOGGER.info("insert model {} {} slice index {} content".format(model_id, model_version, slice_index)) else: break LOGGER.info("Store model {} {} to mysql successfully".format(model_id, model_version)) self.close_connection() except Exception as e: LOGGER.exception(e) raise Exception("Store model {} {} to mysql failed".format(model_id, model_version))
def set_metric_meta_common(self, metric_namespace: str, metric_name: str, metric_meta: MetricMeta, job_level=False): LOGGER.info( "Request save job {} task {} {} on {} {} metric {} {} meta".format( self.job_id, self.task_id, self.task_version, self.role, self.party_id, metric_namespace, metric_name)) request_body = dict() request_body['metric_namespace'] = metric_namespace request_body['metric_name'] = metric_name request_body['metric_meta'] = serialize_b64(metric_meta, to_str=True) request_body['job_level'] = job_level response = api_utils.local_api( job_id=self.job_id, method='POST', endpoint='/tracker/{}/{}/{}/{}/{}/{}/metric_meta/save'.format( self.job_id, self.component_name, self.task_id, self.task_version, self.role, self.party_id), json_body=request_body) return response['retcode'] == RetCode.SUCCESS
def db_value(self, value): return serialize_b64(value, to_str=True)
def store(self, model_id: str, model_version: str, store_address: dict, force_update: bool = False): """ Store the model from local cache to mysql :param model_id: :param model_version: :param store_address: :param force_update: :return: """ if not force_update and self.exists(model_id, model_version, store_address): raise FileExistsError( f"The model {model_id} {model_version} already exists in the database." ) model = PipelinedModel(model_id, model_version) self.get_connection(store_address) try: DB.create_tables([MachineLearningModel]) LOGGER.info(f"Starting store model {model_id} {model_version}.") with open(model.packaging_model(), "rb") as fr, DB.connection_context(): slice_index = 0 while True: content = fr.read(SLICE_MAX_SIZE) if not content: break model_in_table = MachineLearningModel() model_in_table.f_create_time = current_timestamp() model_in_table.f_model_id = model_id model_in_table.f_model_version = model_version model_in_table.f_content = serialize_b64(content, to_str=True) model_in_table.f_size = sys.getsizeof( model_in_table.f_content) model_in_table.f_slice_index = slice_index rows = 0 if force_update: rows = model_in_table.save(only=[ MachineLearningModel.f_content, MachineLearningModel.f_size, MachineLearningModel.f_update_time, MachineLearningModel.f_slice_index, ]) if not rows: rows = model_in_table.save(force_insert=True) if not rows: raise Exception( f"Save slice index {slice_index} failed") LOGGER.info( f"Saved slice index {slice_index} of model {model_id} {model_version}." ) slice_index += 1 except Exception as e: LOGGER.exception(e) raise Exception( f"Store model {model_id} {model_version} to mysql failed.") else: LOGGER.info( f"Store model {model_id} {model_version} to mysql successfully." ) finally: self.close_connection()