def read_checkpoint(self): checkpoint_manager = CheckpointManager( role=self.tracker.role, party_id=self.tracker.party_id, model_id=self.model_id, model_version=self.model_version, component_name=self.component_name, mkdir=False, ) checkpoint_manager.load_checkpoints_from_disk() if self.step_index is not None: checkpoint = checkpoint_manager.get_checkpoint_by_index( self.step_index) elif self.step_name is not None: checkpoint = checkpoint_manager.get_checkpoint_by_name( self.step_name) else: checkpoint = checkpoint_manager.latest_checkpoint if checkpoint is None: raise ValueError('The checkpoint was not found.') data = checkpoint.read(include_database=True) data['model_id'] = checkpoint_manager.model_id data['model_version'] = checkpoint_manager.model_version data['component_name'] = checkpoint_manager.component_name self.model_output = data.pop('models') self.tracker.set_metric_meta( 'model_loader', f'{checkpoint.step_index}-{checkpoint.step_name}', MetricMeta('checkpoint', 'checkpoint_info', data))
def callback_metric(self, metric_name, metric_namespace, metric_data): self.tracker.log_metric_data( metric_name=metric_name, metric_namespace=metric_namespace, metrics=metric_data, ) self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name="download", metric_type="DOWNLOAD"), )
def save_meta(self, dst_table_namespace, dst_table_name, table_count): self.tracker.log_output_data_info( data_name="upload", table_namespace=dst_table_namespace, table_name=dst_table_name, ) self.tracker.log_metric_data( metric_namespace="upload", metric_name="data_access", metrics=[Metric("count", table_count)], ) self.tracker.set_metric_meta( metric_namespace="upload", metric_name="data_access", metric_meta=MetricMeta(name="upload", metric_type="UPLOAD"), )
def read_component_model(self): pipelined_model = PipelinedModel( gen_party_model_id(self.model_id, self.tracker.role, self.tracker.party_id), self.model_version) component_model = pipelined_model._read_component_model( self.component_name, self.model_alias) if not component_model: raise ValueError('The component model is empty.') self.model_output = component_model self.tracker.set_metric_meta( 'model_loader', f'{self.component_name}-{self.model_alias}', MetricMeta( 'component_model', 'component_model_info', { 'model_id': self.model_id, 'model_version': self.model_version, 'component_name': self.component_name, 'model_alias': self.model_alias, }))
def save_metric_meta(self, metric_namespace: str, metric_name: str, metric_meta: MetricMeta, job_level: bool = False): schedule_logger(self.job_id).info( 'save component {} on {} {} {} {} metric meta'.format(self.component_name, self.role, self.party_id, metric_namespace, metric_name)) self.metric_manager.insert_metrics_into_db(metric_namespace, metric_name, 0, metric_meta.to_dict().items(), job_level)
def get_metric_meta(self, metric_namespace: str, metric_name: str, job_level: bool = False): kv = dict() for k, v in self.metric_manager.read_metrics_from_db(metric_namespace, metric_name, 0, job_level): kv[k] = v return MetricMeta(name=kv.get('name'), metric_type=kv.get('metric_type'), extra_metas=kv)
def _run(self, cpn_input: ComponentInputProtocol): self.parameters = cpn_input.parameters LOGGER.info(self.parameters) for k, v in self.parameters.items(): if hasattr(self, k): setattr(self, k, v) tracker = Tracker(job_id=self.job_id, role=self.tracker.role, party_id=self.tracker.party_id, component_name=self.component_name) LOGGER.info(f"query cache by cache key: {self.cache_key} cache name: {self.cache_name}") # todo: use tracker client but not tracker caches = tracker.query_output_cache(cache_key=self.cache_key, cache_name=self.cache_name) if not caches: raise Exception("can not found this cache") elif len(caches) > 1: raise Exception(f"found {len(caches)} caches, only support one, please check parameters") else: cache = caches[0] self.cache_output = cache tracker.job_id = self.tracker.job_id tracker.component_name = self.tracker.component_name metric_meta = cache.to_dict() metric_meta.pop("data") metric_meta["component_name"] = self.component_name self.tracker.set_metric_meta(metric_namespace="cache_loader", metric_name=cache.name, metric_meta=MetricMeta(name="cache", metric_type="cache_info", extra_metas=metric_meta))
def _run(self, cpn_input: ComponentInputProtocol): self.parameters = cpn_input.parameters self.job_parameters = cpn_input.job_parameters output_storage_address = self.job_parameters.engines_address[ EngineType.STORAGE] # only support one input table table_key = [key for key in self.parameters.keys()][0] input_table_namespace, input_table_name = self.get_input_table_info( parameters=self.parameters[table_key], role=self.tracker.role, party_id=self.tracker.party_id, ) ( output_table_namespace, output_table_name, ) = default_output_info( task_id=self.tracker.task_id, task_version=self.tracker.task_version, output_type="data", ) ( input_table_meta, output_table_address, output_table_engine, ) = self.convert_check( input_name=input_table_name, input_namespace=input_table_namespace, output_name=output_table_name, output_namespace=output_table_namespace, computing_engine=self.job_parameters.computing_engine, output_storage_address=output_storage_address, ) sess = Session.get_global() input_table = sess.get_table( name=input_table_meta.get_name(), namespace=input_table_meta.get_namespace()) # update real count to meta info input_table.count() # Table replication is required if input_table_meta.get_engine() != output_table_engine: LOGGER.info( f"the {input_table_meta.get_engine()} engine input table needs to be converted to {output_table_engine} engine to support computing engine {self.job_parameters.computing_engine}" ) else: LOGGER.info( f"the {input_table_meta.get_engine()} input table needs to be transform format" ) LOGGER.info("reader create storage session2") output_table_session = sess.storage(storage_engine=output_table_engine) output_table = output_table_session.create_table( address=output_table_address, name=output_table_name, namespace=output_table_namespace, partitions=input_table_meta.partitions, origin=StorageTableOrigin.READER) self.save_table(src_table=input_table, dest_table=output_table) # update real count to meta info output_table_meta = StorageTableMeta(name=output_table.name, namespace=output_table.namespace) # todo: may be set output data, and executor support pass persistent self.tracker.log_output_data_info( data_name=cpn_input.flow_feeded_parameters.get("output_data_name") [0] if cpn_input.flow_feeded_parameters.get("output_data_name") else table_key, table_namespace=output_table_meta.get_namespace(), table_name=output_table_meta.get_name(), ) DataTableTracker.create_table_tracker( output_table_meta.get_name(), output_table_meta.get_namespace(), entity_info={ "have_parent": True, "parent_table_namespace": input_table_namespace, "parent_table_name": input_table_name, "job_id": self.tracker.job_id, }, ) headers_str = output_table_meta.get_schema().get("header") table_info = {} if output_table_meta.get_schema() and headers_str: if isinstance(headers_str, str): data_list = [headers_str.split(",")] is_display = True else: data_list = [headers_str] is_display = False if is_display: for data in output_table_meta.get_part_of_data(): data_list.append(data[1].split(",")) data = np.array(data_list) Tdata = data.transpose() for data in Tdata: table_info[data[0]] = ",".join(list(set(data[1:]))[:5]) data_info = { "table_name": input_table_name, "namespace": input_table_namespace, "table_info": table_info, "partitions": output_table_meta.get_partitions(), "storage_engine": output_table_meta.get_engine(), } if input_table_meta.get_engine() in [StorageEngine.PATH]: data_info["file_count"] = output_table_meta.get_count() data_info["file_path"] = input_table_meta.get_address().path else: data_info["count"] = output_table_meta.get_count() self.tracker.set_metric_meta( metric_namespace="reader_namespace", metric_name="reader_name", metric_meta=MetricMeta(name="reader", metric_type="data_info", extra_metas=data_info), )
def set_metric_meta_common(self, metric_namespace: str, metric_name: str, metric_meta: typing.Union[MetricMeta, dict], job_level=False): LOGGER.info("Request save job {} task {} {} on {} {} metric {} {} meta".format(self.job_id, self.task_id, self.task_version, self.role, self.party_id, metric_namespace, metric_name)) request_body = dict() request_body['metric_namespace'] = metric_namespace request_body['metric_name'] = metric_name request_body['metric_meta'] = serialize_b64(metric_meta if isinstance(metric_meta, MetricMeta) else MetricMeta.from_dict(metric_meta), to_str=True) request_body['job_level'] = job_level response = api_utils.local_api(job_id=self.job_id, method='POST', endpoint='/tracker/{}/{}/{}/{}/{}/{}/metric_meta/save'.format( self.job_id, self.component_name, self.task_id, self.task_version, self.role, self.party_id), json_body=request_body) if response['retcode'] != RetCode.SUCCESS: raise Exception(f"log metric(namespace: {metric_namespace}, name: {metric_name}) meta error, response code: {response['retcode']}, msg: {response['retmsg']}")