def upload_objects(self, objects): """ Perform the uploading. Each element in the objects list should be a dict that looks: { "obj": <path to a local file or a file-like object> "dest": <target file name under "subpath"> "length": <for a file-like object, length in bytes of the file-like object content> } :param objects: a list of object to be uploaded. :return: the complete "s3://" type uri for the sub-path to local all the uploaded objects """ client = self.client bucket = self.bucket found = client.bucket_exists(bucket) if not found: client.make_bucket(bucket) for obj in objects: dest = self.sub_path + "/" + obj['dest'] file = obj['obj'] stat_logger.debug("Uploading {} to {}".format(file, dest)) if hasattr(file, 'read'): length = obj['length'] client.put_object(bucket, dest, file, length) else: client.fput_object(bucket, dest, file) model_path = f's3://{bucket}/{self.sub_path}' stat_logger.info("Uploaded model objects into path: {}".format(model_path)) return model_path
def put_event(self, event): try: self.queue.put(event) stat_logger.info('put event into in-process queue successfully: {}'.format(event)) except Exception as e: stat_logger.exception(e) stat_logger.error('put event into in-process queue failed')
def parse_proto_object(buffer_name, serialized_string, buffer_class=None): try: if buffer_class is None: buffer_class = get_proto_buffer_class(buffer_name) buffer_object = buffer_class() except Exception as e: stat_logger.exception('Can not restore proto buffer object') raise e buffer_name = type(buffer_object).__name__ try: buffer_object.ParseFromString(serialized_string) except Exception as e1: stat_logger.exception(e1) try: DefaultEmptyFillMessage().ParseFromString(serialized_string) buffer_object.ParseFromString(bytes()) except Exception as e2: stat_logger.exception(e2) raise e1 else: stat_logger.info(f'parse {buffer_name} proto object with default values') else: stat_logger.info(f'parse {buffer_name} proto object normal') return buffer_object
def remote_api(job_id, method, endpoint, src_party_id, dest_party_id, json_body, overall_timeout=DEFAULT_GRPC_OVERALL_TIMEOUT): _packet = wrap_grpc_packet(json_body, method, endpoint, src_party_id, dest_party_id, job_id, overall_timeout=overall_timeout) try: channel, stub = get_proxy_data_channel() # stat_logger.info("grpc api request: {}".format(_packet)) _return = stub.unaryCall(_packet) stat_logger.info("grpc api response: {}".format(_return)) channel.close() json_body = json.loads(_return.body.value) return json_body except grpc.RpcError as e: raise Exception('rpc request error: {}'.format(e)) except Exception as e: raise Exception('rpc request error: {}'.format(e))
def save(self, model_buffers: Dict[str, Tuple[str, bytes, dict]]): if not model_buffers: raise ValueError('model_buffers is empty.') self.create_time = datetime.utcnow() data = { 'step_index': self.step_index, 'step_name': self.step_name, 'create_time': self.create_time.isoformat(), 'models': {}, } model_data = {} for model_name, (pb_name, serialized_string, json_format_dict) in model_buffers.items(): model_data[model_name] = (serialized_string, json_format_dict) data['models'][model_name] = { 'sha1': hashlib.sha1(serialized_string).hexdigest(), 'buffer_name': pb_name, } with self.lock: for model_name, model in data['models'].items(): serialized_string, json_format_dict = model_data[model_name] (self.directory / f'{model_name}.pb').write_bytes(serialized_string) (self.directory / f'{model_name}.json').write_text( json_dumps(json_format_dict), 'utf8') self.database.write_text( yaml.dump(data, Dumper=yaml.RoundTripDumper), 'utf8') stat_logger.info(f'Checkpoint saved. path: {self.directory}') return self.directory
def mediation_queue_put_events(queue): n = queue.qsize(status=5) stat_logger.info('start check mediation queue, total num {}'.format(n)) for i in range(n): event = queue.get_event(status=5) try: TaskScheduler.cancel_ready(event['job_id'], event['initiator_role'], event['initiator_party_id']) is_failed = queue.put_event(event, job_id=event['job_id'], status=1) schedule_logger(event['job_id']).info( 'job into queue_1 status is {}'.format( 'success' if not is_failed else 'failed')) if is_failed: schedule_logger(event['job_id']).info('start to cancel job') TaskScheduler.stop(job_id=event['job_id'], end_status=JobStatus.CANCELED) except Exception as e: schedule_logger(event['job_id']).error(e) try: schedule_logger(event['job_id']).info('start cancel job') TaskScheduler.stop(job_id=event['job_id'], end_status=JobStatus.CANCELED) except: schedule_logger(event['job_id']).info('cancel job failed')
def bind_model_service(config_data): service_id = config_data.get('service_id') initiator_role = config_data['initiator']['role'] initiator_party_id = config_data['initiator']['party_id'] model_id = config_data['job_parameters']['model_id'] model_version = config_data['job_parameters']['model_version'] if not config_data.get('servings'): return 100, 'Please configure servings address' for serving in config_data.get('servings'): with grpc.insecure_channel(serving) as channel: stub = model_service_pb2_grpc.ModelServiceStub(channel) publish_model_request = model_service_pb2.PublishRequest() publish_model_request.serviceId = service_id for role_name, role_party in config_data.get("role").items(): publish_model_request.role[role_name].partyId.extend(role_party) publish_model_request.model[initiator_role].roleModelInfo[initiator_party_id].tableName = model_version publish_model_request.model[initiator_role].roleModelInfo[ initiator_party_id].namespace = model_utils.gen_party_model_id(model_id, initiator_role, initiator_party_id) publish_model_request.local.role = initiator_role publish_model_request.local.partyId = initiator_party_id stat_logger.info(publish_model_request) response = stub.publishBind(publish_model_request) stat_logger.info(response) if response.statusCode != 0: return response.statusCode, response.message return 0, None
def del_event(self, event): try: ret = self.dell(event) stat_logger.info('delete event from redis queue {}: {}'.format('successfully' if ret else 'failed', event)) except Exception as e: stat_logger.info('delete event from queue failed:{}'.format(str(e))) raise Exception('{} not in ListQueue'.format(event))
def save_component_model(self, component_name, component_module_name, model_alias, model_buffers): model_proto_index = {} component_model_storage_path = os.path.join(self.variables_data_path, component_name, model_alias) os.makedirs(component_model_storage_path, exist_ok=True) for model_name, buffer_object in model_buffers.items(): storage_path = os.path.join(component_model_storage_path, model_name) buffer_object_serialized_string = buffer_object.SerializeToString() if not buffer_object_serialized_string: fill_message = default_empty_fill_pb2.DefaultEmptyFillMessage() fill_message.flag = 'set' buffer_object_serialized_string = fill_message.SerializeToString( ) with open(storage_path, "wb") as fw: fw.write(buffer_object_serialized_string) model_proto_index[model_name] = type( buffer_object ).__name__ # index of model name and proto buffer class name stat_logger.info("Save {} {} {} buffer".format( component_name, model_alias, model_name)) self.update_component_meta(component_name=component_name, component_module_name=component_module_name, model_alias=model_alias, model_proto_index=model_proto_index) stat_logger.info("Save {} {} successfully".format( component_name, model_alias))
def publish_online(config_data): initiator_role = config_data['initiator']['role'] initiator_party_id = config_data['initiator']['party_id'] model_id = config_data['job_parameters']['model_id'] model_version = config_data['job_parameters']['model_version'] success = True for serving in config_data.get('servings'): with grpc.insecure_channel(serving) as channel: stub = model_service_pb2_grpc.ModelServiceStub(channel) publish_model_request = model_service_pb2.PublishRequest() for role_name, role_party in config_data.get("role").items(): publish_model_request.role[role_name].partyId.extend(role_party) publish_model_request.model[initiator_role].roleModelInfo[initiator_party_id].tableName = model_version publish_model_request.model[initiator_role].roleModelInfo[ initiator_party_id].namespace = model_utils.gen_party_model_id(model_id, initiator_role, initiator_party_id) publish_model_request.local.role = initiator_role publish_model_request.local.partyId = initiator_party_id stat_logger.info(publish_model_request) response = stub.publishOnline(publish_model_request) stat_logger.info(response) if response.statusCode != 0: success = False return success
def authentication_privilege(cls, src_party_id, src_role, request_path, party_id_index, role_index, command, component_index=None): if not src_party_id: src_party_id = 0 src_party_id = str(src_party_id) if src_party_id == PrivilegeAuth.get_dest_party_id( request_path, party_id_index): return stat_logger.info("party {} role {} start authentication".format( src_party_id, src_role)) privilege_dic = PrivilegeAuth.get_authentication_items( request_path, role_index, command) for privilege_type, value in privilege_dic.items(): if value and privilege_type == 'privilege_component': continue if value in PrivilegeAuth.command_whitelist: continue if value: PrivilegeAuth.authentication_privilege_do( value, src_party_id, src_role, privilege_type) stat_logger.info('party {} role {} authenticated success'.format( src_party_id, src_role)) return True
def run_subprocess(config_dir, process_cmd, log_dir=None): stat_logger.info('Starting process command: {}'.format(process_cmd)) stat_logger.info(' '.join(process_cmd)) os.makedirs(config_dir, exist_ok=True) if log_dir: os.makedirs(log_dir, exist_ok=True) std_log = open(os.path.join(log_dir if log_dir else config_dir, 'std.log'), 'w') pid_path = os.path.join(config_dir, 'pid') if os.name == 'nt': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.wShowWindow = subprocess.SW_HIDE else: startupinfo = None p = subprocess.Popen(process_cmd, stdout=std_log, stderr=std_log, startupinfo=startupinfo) with open(pid_path, 'w') as f: f.truncate() f.write(str(p.pid) + "\n") f.flush() return p
def unaryCall(self, _request, context): packet = _request header = packet.header _suffix = packet.body.key param_bytes = packet.body.value param = bytes.decode(param_bytes) job_id = header.task.taskId src = header.src dst = header.dst method = header.operator param_dict = json_loads(param) param_dict['src_party_id'] = str(src.partyId) source_routing_header = [] for key, value in context.invocation_metadata(): source_routing_header.append((key, value)) stat_logger.info( f"grpc request routing header: {source_routing_header}") param = bytes.decode(bytes(json_dumps(param_dict), 'utf-8')) action = getattr(requests, method.lower(), None) if action: print(_suffix) #resp = action(url=get_url(_suffix), data=param, headers=HEADERS) else: pass #resp_json = resp.json() resp_json = {"status": "test"} import time print("sleep") time.sleep(60) return wrap_grpc_packet(resp_json, method, _suffix, dst.partyId, src.partyId, job_id)
def create_component_model(self, component_name, component_module_name, model_alias, model_buffers: typing.Dict[str, typing.Tuple[str, bytes, dict]], user_specified_run_parameters: dict = None): model_proto_index = {} component_model = {"buffer": {}} component_model_storage_path = os.path.join(self.variables_data_path, component_name, model_alias) for model_name, (proto_index, object_serialized, object_json) in model_buffers.items(): storage_path = os.path.join(component_model_storage_path, model_name) component_model["buffer"][storage_path.replace( get_fate_flow_directory(), "")] = (base64.b64encode(object_serialized).decode(), object_json) model_proto_index[ model_name] = proto_index # index of model name and proto buffer class name stat_logger.info("save {} {} {} buffer".format( component_name, model_alias, model_name)) component_model["component_name"] = component_name component_model["component_module_name"] = component_module_name component_model["model_alias"] = model_alias component_model["model_proto_index"] = model_proto_index component_model["run_parameters"] = user_specified_run_parameters return component_model
def unaryCall(self, _request, context): packet = _request header = packet.header _suffix = packet.body.key param_bytes = packet.body.value param = bytes.decode(param_bytes) job_id = header.task.taskId src = header.src dst = header.dst method = header.operator param_dict = json.loads(param) param_dict['src_party_id'] = str(src.partyId) param = bytes.decode(bytes(json.dumps(param_dict), 'utf-8')) action = getattr(requests, method.lower(), None) stat_logger.info('rpc receive: {}'.format(packet)) if action: stat_logger.info("rpc receive: {} {}".format( get_url(_suffix), param)) resp = action(url=get_url(_suffix), data=param, headers=HEADERS) else: pass resp_json = resp.json() return wrap_grpc_packet(resp_json, method, _suffix, dst.partyId, src.partyId, job_id)
def do_load_model(): request_data = request.json request_data['servings'] = RuntimeConfig.SERVICE_DB.get_urls('servings') role = request_data['local']['role'] party_id = request_data['local']['party_id'] model_id = request_data['job_parameters']['model_id'] model_version = request_data['job_parameters']['model_version'] party_model_id = model_utils.gen_party_model_id(model_id, role, party_id) if get_base_config('enable_model_store', False): pipeline_model = pipelined_model.PipelinedModel( party_model_id, model_version) component_parameters = { 'model_id': party_model_id, 'model_version': model_version, 'store_address': ServiceRegistry.MODEL_STORE_ADDRESS, } model_storage = get_model_storage(component_parameters) if pipeline_model.exists() and not model_storage.exists( **component_parameters): stat_logger.info( f'Uploading {pipeline_model.model_path} to model storage.') model_storage.store(**component_parameters) elif not pipeline_model.exists() and model_storage.exists( **component_parameters): stat_logger.info( f'Downloading {pipeline_model.model_path} from model storage.') model_storage.restore(**component_parameters) if not model_utils.check_if_deployed(role, party_id, model_id, model_version): return get_json_result( retcode=100, retmsg= "Only deployed models could be used to execute process of loading. " "Please deploy model before loading.") retcode, retmsg = publish_model.load_model(request_data) try: if not retcode: with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_role == request_data["local"]["role"], MLModel.f_party_id == request_data["local"]["party_id"], MLModel.f_model_id == request_data["job_parameters"] ["model_id"], MLModel.f_model_version == request_data["job_parameters"]["model_version"]) if model: model.f_loaded_times += 1 model.save() except Exception as modify_err: stat_logger.exception(modify_err) operation_record(request_data, "load", "success" if not retcode else "failed") return get_json_result(retcode=retcode, retmsg=retmsg)
def search_component(): from fate_flow.db.db_models import ComponentInfo component_list = [] for component in ComponentInfo.select(): component_list.append(component.f_component_name.lower()) component_list = list(set(component_list) - {'upload', 'download'}) PrivilegeAuth.ALL_PERMISSION['privilege_component'].extend(component_list) stat_logger.info('search component list {}'.format(component_list))
def clean_task(self, roles, party_ids): stat_logger.info('clean table by namespace {}'.format(self.task_id)) session.clean_tables(namespace=self.task_id, regex_string='*') for role in roles.split(','): for party_id in party_ids.split(','): session.clean_tables(namespace=self.task_id + '_' + role + '_' + party_id, regex_string='*')
def packaging_model(self): if not os.path.exists(self.model_path): raise Exception("Can not found {} {} model local cache".format(self.model_id, self.model_version)) archive_file_path = shutil.make_archive(base_name=self.archive_model_base_path(), format=self.default_archive_format, root_dir=self.model_path) stat_logger.info("Make model {} {} archive on {} successfully".format(self.model_id, self.model_version, archive_file_path)) return archive_file_path
def __init__(self): super(MysqlQueue, self).__init__() self.ready = True self.mutex = threading.Lock() self.not_empty = threading.Condition(self.mutex) self.not_full = threading.Condition(self.mutex) self.maxsize = 0 stat_logger.info('init queue')
def put_event(self, event): try: conn = self.get_conn() ret = conn.lpush(self.queue_name, json.dumps(event)) stat_logger.info('put event into redis queue {}: {}'.format('successfully' if ret else 'failed', event)) except Exception as e: stat_logger.exception(e) stat_logger.error('put event into redis queue failed')
def del_event(self, event): ret = self.dell(event) if not ret: raise Exception( 'delete event failed, {} not in MysqlQueue'.format(event)) else: stat_logger.info( 'delete event from queue success: {}'.format(event))
def get_event(self): try: event = self.queue.get(block=True) stat_logger.info('get event from in-process queue successfully: {}'.format(event)) return event except Exception as e: stat_logger.exception(e) stat_logger.error('get event from in-process queue failed') return None
def drop_metric_data_mode(model): try: drop_sql = 'drop table t_tracking_metric_{}'.format(model) DB.execute_sql(drop_sql) stat_logger.info(drop_sql) return drop_sql except Exception as e: stat_logger.exception(e) raise e
def query_model_info_from_file(model_id=None, model_version=None, role=None, party_id=None, query_filters=None, to_dict=False, **kwargs): res = {} if to_dict else [] model_dir = os.path.join(get_fate_flow_directory(), 'model_local_cache') glob_dir = f"{model_dir}{os.sep}{role if role else '*'}#{party_id if party_id else '*'}#{model_id if model_id else '*'}{os.sep}{model_version if model_version else '*'}" stat_logger.info(f'glob model dir: {glob_dir}') model_fp_list = glob.glob(glob_dir) if model_fp_list: for fp in model_fp_list: pipeline_model = PipelinedModel(model_id=fp.split(os.path.sep)[-2], model_version=fp.split( os.path.sep)[-1]) model_info = gather_model_info_data(pipeline_model, query_filters=query_filters) if model_info: _role = fp.split('/')[-2].split('#')[0] _party_id = fp.split('/')[-2].split('#')[1] model_info["f_role"] = _role model_info["f_party_id"] = _party_id if isinstance(res, dict): res[fp] = model_info else: res.append(model_info) if kwargs.get('save'): try: insert_info = gather_model_info_data( pipeline_model).copy() insert_info['role'] = _role insert_info['party_id'] = _party_id insert_info['job_id'] = insert_info.get( 'f_model_version') insert_info[ 'size'] = pipeline_model.calculate_model_file_size( ) if compare_version(insert_info['f_fate_version'], '1.5.1') == 'lt': insert_info['roles'] = insert_info.get( 'f_train_runtime_conf', {}).get('role', {}) insert_info['initiator_role'] = insert_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') insert_info[ 'initiator_party_id'] = insert_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') save_model_info(insert_info) except Exception as e: stat_logger.exception(e) if res: return 0, 'Query model info from local model success.', res return 100, 'Query model info failed, cannot find model from local model files.', res
def clean_server_used_session(): used_session_id = None try: used_session_id = session.get_session_id() session.stop() except: pass session.exit() stat_logger.info("clean session {} for fate flow server done".format(used_session_id))
def destroy(self): """ Delete the InferenceService """ if self.status() is not None: self.kfserving_client.delete(self.service_id, namespace=self.namespace) stat_logger.info("InferenceService {} is deleted".format( self.service_id))
def __init__(self): super(ListQueue, self).__init__() self.queue = [] self.ready = True self.mutex = threading.Lock() self.not_empty = threading.Condition(self.mutex) self.not_full = threading.Condition(self.mutex) self.maxsize = 0 self.unfinished_tasks = 0 stat_logger.info('init in-process queue')
def del_event(self, event): try: conn = self.get_conn() ret = conn.lrem(self.queue_name, 1, json.dumps(event)) stat_logger.info('delete event from redis queue {}: {}'.format('successfully' if ret else 'failed', event)) if not ret: raise Exception('job not in redis queue') except Exception as e: stat_logger.info('delete event from redis queue failed:{}'.format(str(e))) raise Exception('delete event from redis queue failed')
def authentication_privilege_do(cls, value, src_party_id, src_role, privilege_type): if value not in PrivilegeAuth.privilege_cache.get( src_party_id, {}).get(src_role, {}).get(privilege_type, []): if value not in PrivilegeAuth.get_permission_config( src_party_id, src_role).get(privilege_type, []): stat_logger.info('{} {} not authorized'.format( privilege_type.split('_')[1], value)) raise Exception('{} {} not authorized'.format( privilege_type.split('_')[1], value))