Beispiel #1
0
 def finish_job(job_id, job_runtime_conf):
     job_parameters = job_runtime_conf['job_parameters']
     job_initiator = job_runtime_conf['initiator']
     model_id_base64 = base64_encode(job_parameters['model_id'])
     model_version_base64 = base64_encode(job_parameters['model_version'])
     for role, partys in job_runtime_conf['role'].items():
         for party_id in partys:
             # save pipeline
             federated_api(
                 job_id=job_id,
                 method='POST',
                 endpoint='/{}/job/{}/{}/{}/{}/{}/save/pipeline'.format(
                     API_VERSION, job_id, role, party_id, model_id_base64,
                     model_version_base64),
                 src_party_id=job_initiator['party_id'],
                 dest_party_id=party_id,
                 json_body={},
                 work_mode=job_parameters['work_mode'])
             # clean
             federated_api(job_id=job_id,
                           method='POST',
                           endpoint='/{}/job/{}/{}/{}/clean'.format(
                               API_VERSION, job_id, role, party_id),
                           src_party_id=job_initiator['party_id'],
                           dest_party_id=party_id,
                           json_body={},
                           work_mode=job_parameters['work_mode'])
Beispiel #2
0
 def sync_job_status(job_id,
                     roles,
                     work_mode,
                     initiator_party_id,
                     initiator_role,
                     job_info,
                     sync_failed=False):
     for role, partys in roles.items():
         job_info['f_role'] = role
         for party_id in partys:
             job_info['f_party_id'] = party_id
             try:
                 federated_api(
                     job_id=job_id,
                     method='POST',
                     endpoint='/{}/schedule/{}/{}/{}/status'.format(
                         API_VERSION, job_id, role, party_id),
                     src_party_id=initiator_party_id,
                     dest_party_id=party_id,
                     src_role=initiator_role,
                     json_body=job_info,
                     work_mode=work_mode)
             except Exception as e:
                 if sync_failed:
                     pass
                 else:
                     raise Exception(e)
Beispiel #3
0
 def run_do(self):
     try:
         running_tasks = job_utils.query_task(status='running',
                                              run_ip=get_lan_ip())
         stop_job_ids = set()
         # detect_logger.info('start to detect running job..')
         for task in running_tasks:
             try:
                 process_exist = job_utils.check_job_process(
                     int(task.f_run_pid))
                 if not process_exist:
                     detect_logger.info(
                         'job {} component {} on {} {} task {} {} process does not exist'
                         .format(task.f_job_id, task.f_component_name,
                                 task.f_role, task.f_party_id,
                                 task.f_task_id, task.f_run_pid))
                     stop_job_ids.add(task.f_job_id)
             except Exception as e:
                 detect_logger.exception(e)
         if stop_job_ids:
             schedule_logger().info(
                 'start to stop jobs: {}'.format(stop_job_ids))
         for job_id in stop_job_ids:
             jobs = job_utils.query_job(job_id=job_id)
             if jobs:
                 initiator_party_id = jobs[0].f_initiator_party_id
                 job_work_mode = jobs[0].f_work_mode
                 if len(jobs) > 1:
                     # i am initiator
                     my_party_id = initiator_party_id
                 else:
                     my_party_id = jobs[0].f_party_id
                     initiator_party_id = jobs[0].f_initiator_party_id
                 api_utils.federated_api(
                     job_id=job_id,
                     method='POST',
                     endpoint='/{}/job/stop'.format(API_VERSION),
                     src_party_id=my_party_id,
                     dest_party_id=initiator_party_id,
                     src_role=None,
                     json_body={
                         'job_id': job_id,
                         'operate': 'kill'
                     },
                     work_mode=job_work_mode)
                 TaskScheduler.finish_job(job_id=job_id,
                                          job_runtime_conf=json_loads(
                                              jobs[0].f_runtime_conf),
                                          stop=True)
     except Exception as e:
         detect_logger.exception(e)
     finally:
         detect_logger.info('finish detect running job')
Beispiel #4
0
 def sync_job_status(job_id, roles, work_mode, initiator_party_id,
                     job_info):
     for role, partys in roles.items():
         job_info['f_role'] = role
         for party_id in partys:
             job_info['f_party_id'] = party_id
             federated_api(job_id=job_id,
                           method='POST',
                           endpoint='/{}/job/{}/{}/{}/status'.format(
                               API_VERSION, job_id, role, party_id),
                           src_party_id=initiator_party_id,
                           dest_party_id=party_id,
                           json_body=job_info,
                           work_mode=work_mode)
Beispiel #5
0
 def sync_task_status(job_id, component_name, task_id, role, party_id,
                      initiator_party_id, task_info):
     for dest_party_id in {party_id, initiator_party_id}:
         if party_id != initiator_party_id and dest_party_id == initiator_party_id:
             # do not pass the process id to the initiator
             task_info['f_run_ip'] = ''
         federated_api(job_id=job_id,
                       method='POST',
                       endpoint='/{}/schedule/{}/{}/{}/{}/{}/status'.format(
                           API_VERSION, job_id, component_name, task_id,
                           role, party_id),
                       src_party_id=party_id,
                       dest_party_id=dest_party_id,
                       json_body=task_info,
                       work_mode=RuntimeConfig.WORK_MODE)
Beispiel #6
0
 def start_stop(job_id, operate=None):
     schedule_logger(job_id).info('get {} job {} command'.format(
         'stop', job_id))
     jobs = job_utils.query_job(job_id=job_id, is_initiator=1)
     if not jobs:
         jobs = job_utils.query_job(job_id=job_id)
     if jobs:
         job_info = {'job_id': job_id}
         if operate:
             job_info['operate'] = operate
         job_work_mode = jobs[0].f_work_mode
         initiator_party_id = jobs[0].f_initiator_party_id
         response = federated_api(
             job_id=job_id,
             method='POST',
             endpoint='/{}/job/stop/do'.format(API_VERSION),
             src_party_id=initiator_party_id,
             dest_party_id=initiator_party_id,
             src_role=None,
             json_body=job_info,
             work_mode=job_work_mode)
         return response
     else:
         schedule_logger(job_id).info(
             'send {} job stop command failed, no find this job'.format(
                 job_id))
         raise Exception('can not found job: {}'.format(job_id))
Beispiel #7
0
 def check_job(job_id,
               roles,
               work_mode,
               initiator_party_id,
               initiator_role,
               job_info,
               way='check'):
     for role, partys in roles.items():
         job_info['f_role'] = role
         for party_id in partys:
             job_info['f_party_id'] = party_id
             response = federated_api(
                 job_id=job_id,
                 method='POST',
                 endpoint='/{}/schedule/{}/{}/{}/{}'.format(
                     API_VERSION, job_id, role, party_id, way),
                 src_party_id=initiator_party_id,
                 dest_party_id=party_id,
                 src_role=initiator_role,
                 json_body=job_info,
                 work_mode=work_mode)
             try:
                 if response['retcode'] == 101:
                     return False
             except:
                 return False
     return True
Beispiel #8
0
def start_proxy(role):
    request_config = request.json or request.form.to_dict()
    _job_id = f"{role}_forward"
    if role in ['marketplace']:
        response = proxy_api(role, _job_id, request_config)
    else:
        headers = request.headers
        json_body = {}
        if request_config.get('header') and request_config.get("body"):
            src_party_id = request_config.get('header').get('src_party_id')
            dest_party_id = request_config.get('header').get('dest_party_id')
            json_body = request_config
            if headers:
                json_body['header'].update(headers)
        else:
            src_party_id = headers.get('src_party_id')
            dest_party_id = headers.get('dest_party_id')
            json_body["header"] = request.headers
            json_body["body"] = request_config
        response = federated_api(job_id=_job_id,
                                 method='POST',
                                 endpoint='/forward/{}/do'.format(role),
                                 src_party_id=src_party_id,
                                 dest_party_id=dest_party_id,
                                 src_role=None,
                                 json_body=json_body,
                                 federated_mode=FederatedMode.MULTIPLE)
    return jsonify(response)
Beispiel #9
0
 def report_task_to_initiator(cls, task: Task):
     """
     :param task:
     :return:
     """
     if task.f_role != task.f_initiator_role and task.f_party_id != task.f_initiator_party_id:
         exception = None
         for t in range(DEFAULT_FEDERATED_COMMAND_TRYS):
             try:
                 response = federated_api(
                     job_id=task.f_job_id,
                     method='POST',
                     endpoint='/initiator/{}/{}/{}/{}/{}/{}/report'.format(
                         task.f_job_id, task.f_component_name,
                         task.f_task_id, task.f_task_version, task.f_role,
                         task.f_party_id),
                     src_party_id=task.f_party_id,
                     dest_party_id=task.f_initiator_party_id,
                     src_role=task.f_role,
                     json_body=task.to_human_model_dict(
                         only_primary_with=cls.REPORT_TO_INITIATOR_FIELDS),
                     federated_mode=task.f_federated_mode)
             except Exception as e:
                 exception = e
                 continue
             if response["retcode"] != RetCode.SUCCESS:
                 exception = Exception(response["retmsg"])
             else:
                 return True
         else:
             schedule_logger(job_id=task.f_job_id).error(
                 f"report task to initiator error: {exception}")
             return False
     else:
         return False
def load_model():
    request_config = request.json
    _job_id = generate_job_id()
    initiator_party_id = request_config['initiator']['party_id']
    initiator_role = request_config['initiator']['role']
    publish_model.generate_publish_model_info(request_config)
    load_status = True
    load_status_info = {}
    load_status_msg = 'success'
    for role_name, role_partys in request_config.get("role").items():
        if role_name == 'arbiter':
            continue
        load_status_info[role_name] = load_status_info.get(role_name, {})
        for _party_id in role_partys:
            request_config['local'] = {'role': role_name, 'party_id': _party_id}
            try:
                response = federated_api(job_id=_job_id,
                                         method='POST',
                                         endpoint='/{}/model/load/do'.format(API_VERSION),
                                         src_party_id=initiator_party_id,
                                         dest_party_id=_party_id,
                                         src_role = initiator_role,
                                         json_body=request_config,
                                         work_mode=request_config['job_parameters']['work_mode'])
                load_status_info[role_name][_party_id] = response['retcode']
            except Exception as e:
                stat_logger.exception(e)
                load_status = False
                load_status_msg = 'failed'
                load_status_info[role_name][_party_id] = 100
    return get_json_result(job_id=_job_id, retcode=(0 if load_status else 101), retmsg=load_status_msg,
                           data=load_status_info)
Beispiel #11
0
 def sync_task_status(job_id, component_name, task_id, role, party_id, initiator_party_id, initiator_role, task_info, update=False):
     sync_success = True
     for dest_party_id in {party_id, initiator_party_id}:
         if party_id != initiator_party_id and dest_party_id == initiator_party_id:
             # do not pass the process id to the initiator
             task_info['f_run_ip'] = ''
         response = federated_api(job_id=job_id,
                                  method='POST',
                                  endpoint='/{}/schedule/{}/{}/{}/{}/{}/status'.format(
                                      API_VERSION,
                                      job_id,
                                      component_name,
                                      task_id,
                                      role,
                                      party_id),
                                  src_party_id=party_id,
                                  dest_party_id=dest_party_id,
                                  src_role=role,
                                  json_body=task_info,
                                  work_mode=RuntimeConfig.WORK_MODE)
         if response['retcode']:
             sync_success = False
             schedule_logger().exception('job {} role {} party {} synchronize task status failed'.format(job_id, role, party_id))
             break
     if not sync_success and not update:
         task_info['f_status'] = TaskStatus.FAILED
         TaskExecutor.sync_task_status(job_id, component_name, task_id, role, party_id, initiator_party_id,
                                       initiator_role, task_info, update=True)
     if update:
         raise Exception('job {} role {} party {} synchronize task status failed'.format(job_id, role, party_id))
Beispiel #12
0
 def distribute_job(job, roles, job_initiator):
     for role, partys in roles.items():
         job.f_role = role
         for party_id in partys:
             job.f_party_id = party_id
             if role == job_initiator['role'] and party_id == job_initiator['party_id']:
                 job.f_is_initiator = 1
             else:
                 job.f_is_initiator = 0
             response_json = federated_api(job_id=job.f_job_id,
                                           method='POST',
                                           endpoint='/{}/schedule/{}/{}/{}/create'.format(
                                               API_VERSION,
                                               job.f_job_id,
                                               role,
                                               party_id),
                                           src_party_id=job_initiator['party_id'],
                                           dest_party_id=party_id, src_role=job_initiator['role'],
                                           json_body=job.to_json(),
                                           work_mode=job.f_work_mode)
             if response_json["retcode"]:
                 job.f_status = JobStatus.FAILED
                 TaskScheduler.sync_job_status(job_id=job.f_job_id, roles=roles,
                                               work_mode=job.f_work_mode,
                                               initiator_party_id=job_initiator['party_id'],
                                               initiator_role=job_initiator['role'],
                                               job_info=job.to_json())
                 raise Exception(
                     "an error occurred while creating the job: role {} party_id {}".format(role, party_id)
                     + "\n" + str(response_json["retmsg"]))
Beispiel #13
0
 def distribute_job(job, roles, job_initiator):
     for role, partys in roles.items():
         job.f_role = role
         for party_id in partys:
             job.f_party_id = party_id
             if role == job_initiator['role'] and party_id == job_initiator[
                     'party_id']:
                 job.f_is_initiator = 1
             else:
                 job.f_is_initiator = 0
             federated_api(job_id=job.f_job_id,
                           method='POST',
                           endpoint='/{}/job/{}/{}/{}/create'.format(
                               API_VERSION, job.f_job_id, role, party_id),
                           src_party_id=job_initiator['party_id'],
                           dest_party_id=party_id,
                           json_body=job.to_json(),
                           work_mode=job.f_work_mode)
Beispiel #14
0
    def stop(job_id, end_status=JobStatus.FAILED, component_name=''):
        schedule_logger(job_id).info('get {} job {} {} command'.format("cancel" if end_status == JobStatus.CANCELED else "stop", job_id, component_name))
        jobs = job_utils.query_job(job_id=job_id, is_initiator=1)
        cancel_success = False
        is_cancel = (end_status == JobStatus.CANCELED)
        if jobs:
            initiator_job = jobs[0]
            job_info = {'f_job_id': job_id, 'f_status': end_status}
            roles = json_loads(initiator_job.f_roles)
            job_work_mode = initiator_job.f_work_mode
            initiator_party_id = initiator_job.f_party_id

            # set status first
            if not component_name:
                TaskScheduler.sync_job_status(job_id=job_id, roles=roles, initiator_party_id=initiator_party_id,
                                              initiator_role=initiator_job.f_role,
                                              work_mode=job_work_mode,
                                              job_info=job_info)
            for role, partys in roles.items():
                for party_id in partys:
                    response = federated_api(job_id=job_id,
                                             method='POST',
                                             endpoint='/{}/schedule/{}/{}/{}/{}'.format(
                                                 API_VERSION,
                                                 job_id,
                                                 role,
                                                 party_id,
                                                 "cancel" if is_cancel else "kill"
                                             ),
                                             src_party_id=initiator_party_id,
                                             dest_party_id=party_id,
                                             src_role=initiator_job.f_role,
                                             json_body={'job_initiator': {'party_id': initiator_job.f_party_id,
                                                                          'role': initiator_job.f_role},
                                                        'timeout': end_status == JobStatus.TIMEOUT,
                                                        'component_name': component_name
                                                        },
                                             work_mode=job_work_mode)
                    if response['retcode'] == 0:
                        cancel_success = True
                        schedule_logger(job_id).info(
                            'send {} {} {} job {} {} command successfully'.format(role, party_id, "cancel" if is_cancel else "kill",
                                                                                  job_id, component_name))
                        if is_cancel:
                            break
                    else:
                        schedule_logger(job_id).info(
                            'send {} {} {} job {} {} command failed: {}'.format(role, party_id, "cancel" if is_cancel else "kill",
                                                                                job_id, component_name, response['retmsg']))
            if is_cancel:
                return cancel_success
        else:
            jobs = job_utils.query_job(job_id=job_id)
            if jobs:
                raise Exception('Current role is not this job initiator')
            schedule_logger(job_id).info('send {} job {} {} command failed'.format("cancel" if is_cancel else "kill", job_id, component_name))
            raise Exception('can not found job: {}'.format(job_id))
Beispiel #15
0
 def task_command(cls,
                  job,
                  task,
                  command,
                  command_body=None,
                  need_user=False):
     federated_response = {}
     job_parameters = job.f_runtime_conf_on_party["job_parameters"]
     dsl_parser = schedule_utils.get_job_dsl_parser(
         dsl=job.f_dsl,
         runtime_conf=job.f_runtime_conf_on_party,
         train_runtime_conf=job.f_train_runtime_conf)
     component = dsl_parser.get_component_info(
         component_name=task.f_component_name)
     component_parameters = component.get_role_parameters()
     for dest_role, parameters_on_partys in component_parameters.items():
         federated_response[dest_role] = {}
         for parameters_on_party in parameters_on_partys:
             dest_party_id = parameters_on_party.get('local',
                                                     {}).get('party_id')
             try:
                 if need_user:
                     command_body["user_id"] = job.f_user.get(
                         dest_role, {}).get(str(dest_party_id), "")
                     schedule_logger(job_id=job.f_job_id).info(
                         f'user:{job.f_user}, dest_role:{dest_role}, dest_party_id:{dest_party_id}'
                     )
                     schedule_logger(job_id=job.f_job_id).info(
                         f'command_body: {command_body}')
                 response = federated_api(
                     job_id=task.f_job_id,
                     method='POST',
                     endpoint='/party/{}/{}/{}/{}/{}/{}/{}'.format(
                         task.f_job_id, task.f_component_name,
                         task.f_task_id, task.f_task_version, dest_role,
                         dest_party_id, command),
                     src_party_id=job.f_initiator_party_id,
                     dest_party_id=dest_party_id,
                     src_role=job.f_initiator_role,
                     json_body=command_body if command_body else {},
                     federated_mode=job_parameters["federated_mode"])
                 federated_response[dest_role][dest_party_id] = response
             except Exception as e:
                 federated_response[dest_role][dest_party_id] = {
                     "retcode": RetCode.FEDERATED_ERROR,
                     "retmsg": "Federated schedule error, {}".format(str(e))
                 }
             if federated_response[dest_role][dest_party_id]["retcode"]:
                 schedule_logger(job_id=job.f_job_id).warning(
                     "an error occurred while {} the task to role {} party {}: \n{}"
                     .format(
                         command, dest_role, dest_party_id,
                         federated_response[dest_role][dest_party_id]
                         ["retmsg"]))
     return cls.return_federated_response(
         federated_response=federated_response)
Beispiel #16
0
 def finish_job(job_id, job_runtime_conf, stop=False):
     job_parameters = job_runtime_conf['job_parameters']
     job_initiator = job_runtime_conf['initiator']
     model_id_base64 = base64_encode(job_parameters['model_id'])
     model_version_base64 = base64_encode(job_parameters['model_version'])
     roles = ','.join(job_runtime_conf['role'].keys())
     party_ids = ','.join([','.join([str(j) for j in i]) for i in job_runtime_conf['role'].values()])
     for role, partys in job_runtime_conf['role'].items():
         for party_id in partys:
             # save pipeline
             if not stop:
                 federated_api(job_id=job_id,
                               method='POST',
                               endpoint='/{}/schedule/{}/{}/{}/{}/{}/save/pipeline'.format(
                                   API_VERSION,
                                   job_id,
                                   role,
                                   party_id,
                                   model_id_base64,
                                   model_version_base64
                               ),
                               src_party_id=job_initiator['party_id'],
                               dest_party_id=party_id,
                               src_role=job_initiator['role'],
                               json_body={},
                               work_mode=job_parameters['work_mode'])
             # clean
             federated_api(job_id=job_id,
                           method='POST',
                           endpoint='/{}/schedule/{}/{}/{}/{}/{}/clean'.format(
                               API_VERSION,
                               job_id,
                               role,
                               party_id,
                               roles,
                               party_ids
                           ),
                           src_party_id=job_initiator['party_id'],
                           dest_party_id=party_id,
                           src_role=job_initiator['role'],
                           json_body={},
                           work_mode=job_parameters['work_mode'])
     schedule_logger(job_id, delete=True)
Beispiel #17
0
 def job_command(cls,
                 job,
                 command,
                 command_body=None,
                 dest_only_initiator=False,
                 specific_dest=None,
                 order_federated=False):
     federated_response = {}
     job_parameters = job.f_runtime_conf_on_party["job_parameters"]
     if dest_only_initiator:
         dest_partys = [(job.f_initiator_role, [job.f_initiator_party_id])]
         api_type = "initiator"
     elif specific_dest:
         dest_partys = specific_dest.items()
         api_type = "party"
     else:
         dest_partys = job.f_roles.items()
         api_type = "party"
     if order_federated:
         dest_partys = schedule_utils.federated_order_reset(
             dest_partys,
             scheduler_partys_info=[(job.f_initiator_role,
                                     job.f_initiator_party_id)])
     for dest_role, dest_party_ids in dest_partys:
         federated_response[dest_role] = {}
         for dest_party_id in dest_party_ids:
             try:
                 response = federated_api(
                     job_id=job.f_job_id,
                     method='POST',
                     endpoint='/{}/{}/{}/{}/{}'.format(
                         api_type, job.f_job_id, dest_role, dest_party_id,
                         command),
                     src_party_id=job.f_initiator_party_id,
                     dest_party_id=dest_party_id,
                     src_role=job.f_initiator_role,
                     json_body=command_body if command_body else {},
                     federated_mode=job_parameters["federated_mode"])
                 federated_response[dest_role][dest_party_id] = response
             except Exception as e:
                 schedule_logger(job_id=job.f_job_id).exception(e)
                 federated_response[dest_role][dest_party_id] = {
                     "retcode": RetCode.FEDERATED_ERROR,
                     "retmsg": "Federated schedule error, {}".format(e)
                 }
             if federated_response[dest_role][dest_party_id]["retcode"]:
                 schedule_logger(job_id=job.f_job_id).warning(
                     "an error occurred while {} the job to role {} party {}: \n{}"
                     .format(
                         command, dest_role, dest_party_id,
                         federated_response[dest_role][dest_party_id]
                         ["retmsg"]))
     return cls.return_federated_response(
         federated_response=federated_response)
Beispiel #18
0
 def align_task_parameters(job_id, job_parameters, job_initiator, job_args,
                           component, task_id):
     parameters = component.get_role_parameters()
     component_name = component.get_name()
     extra_task_parameters = {
         'input_data_partition': 0
     }  # Large integers are not used
     for role, partys_parameters in parameters.items():
         for party_index in range(len(partys_parameters)):
             party_parameters = partys_parameters[party_index]
             if role in job_args:
                 party_job_args = job_args[role][party_index]['args']
             else:
                 party_job_args = {}
             dest_party_id = party_parameters.get('local',
                                                  {}).get('party_id')
             if job_parameters.get('align_task_input_data_partition',
                                   ALIGN_TASK_INPUT_DATA_PARTITION_SWITCH):
                 response = federated_api(
                     job_id=job_id,
                     method='POST',
                     endpoint='/{}/schedule/{}/{}/{}/{}/{}/input/args'.
                     format(API_VERSION, job_id, component_name, task_id,
                            role, dest_party_id),
                     src_party_id=job_initiator['party_id'],
                     dest_party_id=dest_party_id,
                     src_role=job_initiator['role'],
                     json_body={
                         'job_parameters': job_parameters,
                         'job_args': party_job_args,
                         'input': component.get_input()
                     },
                     work_mode=job_parameters['work_mode'])
                 if response['retcode'] == 0:
                     for input_data in response.get('data',
                                                    {}).get('data',
                                                            {}).values():
                         for data_table_info in input_data.values():
                             if data_table_info:
                                 partitions = data_table_info['partitions']
                                 if extra_task_parameters[
                                         'input_data_partition'] == 0 or partitions < extra_task_parameters[
                                             'input_data_partition']:
                                     extra_task_parameters[
                                         'input_data_partition'] = partitions
                 else:
                     raise Exception(
                         'job {} component {} align task parameters failed on {} {}'
                         .format(job_id, component_name, role,
                                 dest_party_id))
     return extra_task_parameters
Beispiel #19
0
 def tracker_command(cls, job, request_data, command, json_body=None):
     job_parameters = job.f_runtime_conf_on_party["job_parameters"]
     response = federated_api(
         job_id=str(request_data['job_id']),
         method='POST',
         endpoint='/tracker/{}/{}/{}/{}/{}'.format(
             request_data['job_id'], request_data['component_name'],
             request_data['role'], request_data['party_id'], command),
         src_party_id=job.f_party_id,
         dest_party_id=request_data['party_id'],
         src_role=job.f_role,
         json_body=json_body if json_body else {},
         federated_mode=job_parameters["federated_mode"])
     return response
Beispiel #20
0
def start_proxy(role):
    request_config = request.json or request.form.to_dict()
    _job_id = job_utils.generate_job_id()
    if role in ['marketplace']:
        response = proxy_api(role, _job_id, request_config)
    else:
        response = federated_api(job_id=_job_id,
                                 method='POST',
                                 endpoint='/forward/{}/do'.format(role),
                                 src_party_id=request_config.get('header').get('src_party_id'),
                                 dest_party_id=request_config.get('header').get('dest_party_id'),
                                 src_role=None,
                                 json_body=request_config,
                                 federated_mode=FederatedMode.MULTIPLE)
    return jsonify(response)
Beispiel #21
0
    def stop_job(job_id):
        schedule_logger.info('get stop job {} command'.format(job_id))
        jobs = job_utils.query_job(job_id=job_id, is_initiator=1)
        if jobs:
            initiator_job = jobs[0]
            job_info = {'f_job_id': job_id, 'f_status': JobStatus.FAILED}
            roles = json_loads(initiator_job.f_roles)
            job_work_mode = initiator_job.f_work_mode
            initiator_party_id = initiator_job.f_party_id

            # set status first
            TaskScheduler.sync_job_status(
                job_id=job_id,
                roles=roles,
                initiator_party_id=initiator_party_id,
                work_mode=job_work_mode,
                job_info=job_info)
            for role, partys in roles.items():
                for party_id in partys:
                    response = federated_api(
                        job_id=job_id,
                        method='POST',
                        endpoint='/{}/job/{}/{}/{}/kill'.format(
                            API_VERSION, job_id, role, party_id),
                        src_party_id=initiator_party_id,
                        dest_party_id=party_id,
                        json_body={
                            'job_initiator': {
                                'party_id': initiator_job.f_party_id,
                                'role': initiator_job.f_role
                            }
                        },
                        work_mode=job_work_mode)
                    if response['retcode'] == 0:
                        schedule_logger.info(
                            'send {} {} kill job {} command successfully'.
                            format(role, party_id, job_id))
                    else:
                        schedule_logger.info(
                            'send {} {} kill job {} command failed: {}'.format(
                                role, party_id, job_id, response['retmsg']))
        else:
            schedule_logger.info(
                'send stop job {} command failed'.format(job_id))
            raise Exception('can not found job: {}'.format(job_id))
Beispiel #22
0
 def federated_command(cls, job_id, src_role, src_party_id, dest_role,
                       dest_party_id, endpoint, body, federated_mode,
                       federated_response):
     st = base_utils.current_timestamp()
     log_msg = f"sending {endpoint} federated command"
     schedule_logger(job_id).info(start_log(msg=log_msg))
     try:
         response = federated_api(job_id=job_id,
                                  method='POST',
                                  endpoint=endpoint,
                                  src_role=src_role,
                                  src_party_id=src_party_id,
                                  dest_party_id=dest_party_id,
                                  json_body=body if body else {},
                                  federated_mode=federated_mode)
     except Exception as e:
         schedule_logger(job_id=job_id).exception(e)
         response = {
             "retcode": RetCode.FEDERATED_ERROR,
             "retmsg": "Federated schedule error, {}".format(e)
         }
     if response["retcode"] != RetCode.SUCCESS:
         if response["retcode"] in [RetCode.NOT_EFFECTIVE, RetCode.RUNNING]:
             schedule_logger(job_id).warning(
                 warning_log(msg=log_msg,
                             role=dest_role,
                             party_id=dest_party_id))
         else:
             schedule_logger(job_id).error(
                 failed_log(msg=log_msg,
                            role=dest_role,
                            party_id=dest_party_id,
                            detail=response["retmsg"]))
     federated_response[dest_role][dest_party_id] = response
     et = base_utils.current_timestamp()
     schedule_logger(job_id).info(f"{log_msg} use {et - st} ms")
Beispiel #23
0
    def run_component(job_id, job_runtime_conf, job_parameters, job_initiator, job_args, dag, component):
        parameters = component.get_role_parameters()
        component_name = component.get_name()
        module_name = component.get_module()
        task_id = job_utils.generate_task_id(job_id=job_id, component_name=component_name)
        schedule_logger(job_id).info('job {} run component {}'.format(job_id, component_name))
        for role, partys_parameters in parameters.items():
            for party_index in range(len(partys_parameters)):
                party_parameters = partys_parameters[party_index]
                if role in job_args:
                    party_job_args = job_args[role][party_index]['args']
                else:
                    party_job_args = {}
                dest_party_id = party_parameters.get('local', {}).get('party_id')

                response = federated_api(job_id=job_id,
                              method='POST',
                              endpoint='/{}/schedule/{}/{}/{}/{}/{}/run'.format(
                                  API_VERSION,
                                  job_id,
                                  component_name,
                                  task_id,
                                  role,
                                  dest_party_id),
                              src_party_id=job_initiator['party_id'],
                              dest_party_id=dest_party_id,
                              src_role=job_initiator['role'],
                              json_body={'job_parameters': job_parameters,
                                         'job_initiator': job_initiator,
                                         'job_args': party_job_args,
                                         'parameters': party_parameters,
                                         'module_name': module_name,
                                         'input': component.get_input(),
                                         'output': component.get_output(),
                                         'job_server': {'ip': get_lan_ip(), 'http_port': RuntimeConfig.HTTP_PORT}},
                              work_mode=job_parameters['work_mode'])
                if response['retcode']:
                    if 'not authorized' in response['retmsg']:
                        raise Exception('run component {} not authorized'.format(component_name))
        component_task_status = TaskScheduler.check_task_status(job_id=job_id, component=component)
        job_status = TaskScheduler.check_job_status(job_id)
        if component_task_status and job_status:
            task_success = True
        else:
            task_success = False
        schedule_logger(job_id).info(
            'job {} component {} run {}'.format(job_id, component_name, 'success' if task_success else 'failed'))
        # update progress
        TaskScheduler.sync_job_status(job_id=job_id, roles=job_runtime_conf['role'],
                                      work_mode=job_parameters['work_mode'],
                                      initiator_party_id=job_initiator['party_id'],
                                      initiator_role=job_initiator['role'],
                                      job_info=job_utils.update_job_progress(job_id=job_id, dag=dag,
                                                                             current_task_id=task_id).to_json())
        TaskScheduler.stop(job_id=job_id, component_name=component_name)
        if task_success:
            next_components = dag.get_next_components(component_name)
            schedule_logger(job_id).info('job {} component {} next components is {}'.format(job_id, component_name,
                                                                                    [next_component.get_name() for
                                                                                     next_component in
                                                                                     next_components]))
            for next_component in next_components:
                try:
                    schedule_logger(job_id).info(
                        'job {} check component {} dependencies status'.format(job_id, next_component.get_name()))
                    dependencies_status = TaskScheduler.check_dependencies(job_id=job_id, dag=dag,
                                                                           component=next_component)
                    job_status = TaskScheduler.check_job_status(job_id)
                    schedule_logger(job_id).info(
                        'job {} component {} dependencies status is {}, job status is {}'.format(job_id, next_component.get_name(),
                                                                               dependencies_status, job_status))
                    if dependencies_status and job_status:
                        run_status = TaskScheduler.run_component(job_id, job_runtime_conf, job_parameters,
                                                                 job_initiator, job_args, dag,
                                                                 next_component)
                    else:
                        run_status = False
                except Exception as e:
                    schedule_logger(job_id).exception(e)
                    run_status = False
                if not run_status:
                    return False
            return True
        else:
            if component_task_status == None:
                end_status = JobStatus.TIMEOUT
            else:
                end_status = JobStatus.FAILED
            TaskScheduler.stop(job_id=job_id, end_status=end_status)
            return False
Beispiel #24
0
def deploy():
    request_data = request.json
    require_parameters = ['model_id', 'model_version']
    check_config(request_data, require_parameters)
    model_id = request_data.get("model_id")
    model_version = request_data.get("model_version")
    retcode, retmsg, model_info = model_utils.query_model_info_from_file(
        model_id=model_id, model_version=model_version, to_dict=True)
    if not model_info:
        raise Exception(
            f'Deploy model failed, no model {model_id} {model_version} found.')
    else:
        for key, value in model_info.items():
            version_check = model_utils.compare_version(
                value.get('f_fate_version'), '1.5.0')
            if version_check == 'lt':
                continue
            else:
                init_role = key.split('/')[-2].split('#')[0]
                init_party_id = key.split('/')[-2].split('#')[1]
                model_init_role = value.get('f_initiator_role') if value.get(
                    'f_initiator_role') else value.get(
                        'f_train_runtime_conf', {}).get('initiator', {}).get(
                            'role', '')
                model_init_party_id = value.get(
                    'f_initiator_role_party_id') if value.get(
                        'f_initiator_role_party_id') else value.get(
                            'f_train_runtime_conf', {}).get(
                                'initiator', {}).get('party_id', '')
                if (init_role
                        == model_init_role) and (init_party_id
                                                 == str(model_init_party_id)):
                    break
        else:
            raise Exception(
                "Deploy model failed, can not found model of initiator role or the fate version of model is older than 1.5.0"
            )

        # distribute federated deploy task
        _job_id = job_utils.generate_job_id()
        request_data['child_model_version'] = _job_id

        initiator_party_id = model_init_party_id
        initiator_role = model_init_role
        request_data['initiator'] = {
            'role': initiator_role,
            'party_id': initiator_party_id
        }
        deploy_status = True
        deploy_status_info = {}
        deploy_status_msg = 'success'
        deploy_status_info['detail'] = {}

        for role_name, role_partys in value.get("f_train_runtime_conf",
                                                {}).get('role', {}).items():
            if role_name not in ['arbiter', 'host', 'guest']:
                continue
            deploy_status_info[role_name] = deploy_status_info.get(
                role_name, {})
            deploy_status_info['detail'][role_name] = {}
            adapter = JobRuntimeConfigAdapter(
                value.get("f_train_runtime_conf", {}))
            work_mode = adapter.get_job_work_mode()

            for _party_id in role_partys:
                request_data['local'] = {
                    'role': role_name,
                    'party_id': _party_id
                }
                try:
                    response = federated_api(
                        job_id=_job_id,
                        method='POST',
                        endpoint='/model/deploy/do',
                        src_party_id=initiator_party_id,
                        dest_party_id=_party_id,
                        src_role=initiator_role,
                        json_body=request_data,
                        federated_mode=FederatedMode.MULTIPLE
                        if work_mode else FederatedMode.SINGLE)
                    deploy_status_info[role_name][_party_id] = response[
                        'retcode']
                    detail = {_party_id: {}}
                    detail[_party_id]['retcode'] = response['retcode']
                    detail[_party_id]['retmsg'] = response['retmsg']
                    deploy_status_info['detail'][role_name].update(detail)
                    if response['retcode']:
                        deploy_status = False
                        deploy_status_msg = 'failed'
                except Exception as e:
                    stat_logger.exception(e)
                    deploy_status = False
                    deploy_status_msg = 'failed'
                    deploy_status_info[role_name][_party_id] = 100

        deploy_status_info['model_id'] = request_data['model_id']
        deploy_status_info['model_version'] = _job_id
        return get_json_result(retcode=(0 if deploy_status else 101),
                               retmsg=deploy_status_msg,
                               data=deploy_status_info)
Beispiel #25
0
def load_model():
    request_config = request.json
    if request_config.get('job_id', None):
        retcode, retmsg, res_data = model_utils.query_model_info(
            model_version=request_config['job_id'], role='guest')
        if res_data:
            model_info = res_data[0]
            request_config['initiator'] = {}
            request_config['initiator']['party_id'] = str(
                model_info.get('f_initiator_party_id'))
            request_config['initiator']['role'] = model_info.get(
                'f_initiator_role')
            runtime_conf = model_info.get(
                'f_runtime_conf', {}) if model_info.get(
                    'f_runtime_conf', {}) else model_info.get(
                        'f_train_runtime_conf', {})
            adapter = JobRuntimeConfigAdapter(runtime_conf)
            job_parameters = adapter.get_common_parameters().to_dict()
            request_config[
                'job_parameters'] = job_parameters if job_parameters else model_info.get(
                    'f_train_runtime_conf', {}).get('job_parameters')
            roles = runtime_conf.get('role')
            request_config['role'] = roles if roles else model_info.get(
                'f_train_runtime_conf', {}).get('role')
            for key, value in request_config['role'].items():
                for i, v in enumerate(value):
                    value[i] = str(v)
            request_config.pop('job_id')
        else:
            return get_json_result(
                retcode=101,
                retmsg="model with version {} can not be found in database. "
                "Please check if the model version is valid.".format(
                    request_config.get('job_id')))
    _job_id = job_utils.generate_job_id()
    initiator_party_id = request_config['initiator']['party_id']
    initiator_role = request_config['initiator']['role']
    publish_model.generate_publish_model_info(request_config)
    load_status = True
    load_status_info = {}
    load_status_msg = 'success'
    load_status_info['detail'] = {}
    if "federated_mode" not in request_config['job_parameters']:
        if request_config["job_parameters"][
                "work_mode"] == WorkMode.STANDALONE:
            request_config['job_parameters'][
                "federated_mode"] = FederatedMode.SINGLE
        elif request_config["job_parameters"]["work_mode"] == WorkMode.CLUSTER:
            request_config['job_parameters'][
                "federated_mode"] = FederatedMode.MULTIPLE
    for role_name, role_partys in request_config.get("role").items():
        if role_name == 'arbiter':
            continue
        load_status_info[role_name] = load_status_info.get(role_name, {})
        load_status_info['detail'][role_name] = {}
        for _party_id in role_partys:
            request_config['local'] = {
                'role': role_name,
                'party_id': _party_id
            }
            try:
                response = federated_api(
                    job_id=_job_id,
                    method='POST',
                    endpoint='/model/load/do',
                    src_party_id=initiator_party_id,
                    dest_party_id=_party_id,
                    src_role=initiator_role,
                    json_body=request_config,
                    federated_mode=request_config['job_parameters']
                    ['federated_mode'])
                load_status_info[role_name][_party_id] = response['retcode']
                detail = {_party_id: {}}
                detail[_party_id]['retcode'] = response['retcode']
                detail[_party_id]['retmsg'] = response['retmsg']
                load_status_info['detail'][role_name].update(detail)
                if response['retcode']:
                    load_status = False
                    load_status_msg = 'failed'
            except Exception as e:
                stat_logger.exception(e)
                load_status = False
                load_status_msg = 'failed'
                load_status_info[role_name][_party_id] = 100
    return get_json_result(job_id=_job_id,
                           retcode=(0 if load_status else 101),
                           retmsg=load_status_msg,
                           data=load_status_info)
Beispiel #26
0
def migrate_model_process():
    request_config = request.json
    _job_id = job_utils.generate_job_id()
    initiator_party_id = request_config['migrate_initiator']['party_id']
    initiator_role = request_config['migrate_initiator']['role']
    if not request_config.get("unify_model_version"):
        request_config["unify_model_version"] = _job_id
    migrate_status = True
    migrate_status_info = {}
    migrate_status_msg = 'success'
    migrate_status_info['detail'] = {}

    require_arguments = [
        "migrate_initiator", "role", "migrate_role", "model_id",
        "model_version", "execute_party", "job_parameters"
    ]
    check_config(request_config, require_arguments)

    try:
        if compare_roles(request_config.get("migrate_role"),
                         request_config.get("role")):
            return get_json_result(
                retcode=100,
                retmsg=
                "The config of previous roles is the same with that of migrate roles. "
                "There is no need to migrate model. Migration process aborting."
            )
    except Exception as e:
        return get_json_result(retcode=100, retmsg=str(e))

    local_template = {"role": "", "party_id": "", "migrate_party_id": ""}

    res_dict = {}

    for role_name, role_partys in request_config.get("migrate_role").items():
        for offset, party_id in enumerate(role_partys):
            local_res = deepcopy(local_template)
            local_res["role"] = role_name
            local_res["party_id"] = request_config.get("role").get(
                role_name)[offset]
            local_res["migrate_party_id"] = party_id
            if not res_dict.get(role_name):
                res_dict[role_name] = {}
            res_dict[role_name][local_res["party_id"]] = local_res

    for role_name, role_partys in request_config.get("execute_party").items():
        migrate_status_info[role_name] = migrate_status_info.get(role_name, {})
        migrate_status_info['detail'][role_name] = {}
        for party_id in role_partys:
            request_config["local"] = res_dict.get(role_name).get(party_id)
            try:
                response = federated_api(
                    job_id=_job_id,
                    method='POST',
                    endpoint='/model/migrate/do',
                    src_party_id=initiator_party_id,
                    dest_party_id=party_id,
                    src_role=initiator_role,
                    json_body=request_config,
                    federated_mode=request_config['job_parameters']
                    ['federated_mode'])
                migrate_status_info[role_name][party_id] = response['retcode']
                detail = {party_id: {}}
                detail[party_id]['retcode'] = response['retcode']
                detail[party_id]['retmsg'] = response['retmsg']
                migrate_status_info['detail'][role_name].update(detail)
            except Exception as e:
                stat_logger.exception(e)
                migrate_status = False
                migrate_status_msg = 'failed'
                migrate_status_info[role_name][party_id] = 100
    return get_json_result(job_id=_job_id,
                           retcode=(0 if migrate_status else 101),
                           retmsg=migrate_status_msg,
                           data=migrate_status_info)
Beispiel #27
0
    def run_component(job_id, job_runtime_conf, job_parameters, job_initiator,
                      job_args, dag, component):
        parameters = component.get_role_parameters()
        component_name = component.get_name()
        module_name = component.get_module()
        task_id = job_utils.generate_task_id(job_id=job_id,
                                             component_name=component_name)
        schedule_logger.info('job {} run component {}'.format(
            job_id, component_name))
        for role, partys_parameters in parameters.items():
            for party_index in range(len(partys_parameters)):
                party_parameters = partys_parameters[party_index]
                if role in job_args:
                    party_job_args = job_args[role][party_index]['args']
                else:
                    party_job_args = {}
                dest_party_id = party_parameters.get('local',
                                                     {}).get('party_id')

                federated_api(job_id=job_id,
                              method='POST',
                              endpoint='/{}/job/{}/{}/{}/{}/{}/run'.format(
                                  API_VERSION, job_id, component_name, task_id,
                                  role, dest_party_id),
                              src_party_id=job_initiator['party_id'],
                              dest_party_id=dest_party_id,
                              json_body={
                                  'job_parameters': job_parameters,
                                  'job_initiator': job_initiator,
                                  'job_args': party_job_args,
                                  'parameters': party_parameters,
                                  'module_name': module_name,
                                  'input': component.get_input(),
                                  'output': component.get_output()
                              },
                              work_mode=job_parameters['work_mode'])
        component_task_status = TaskScheduler.check_task_status(
            job_id=job_id, component=component)
        if component_task_status:
            task_success = True
        else:
            task_success = False
        schedule_logger.info('job {} component {} run {}'.format(
            job_id, component_name, 'success' if task_success else 'failed'))
        # update progress
        TaskScheduler.sync_job_status(
            job_id=job_id,
            roles=job_runtime_conf['role'],
            work_mode=job_parameters['work_mode'],
            initiator_party_id=job_initiator['party_id'],
            job_info=job_utils.update_job_progress(
                job_id=job_id, dag=dag, current_task_id=task_id).to_json())
        if task_success:
            next_components = dag.get_next_components(component_name)
            schedule_logger.info(
                'job {} component {} next components is {}'.format(
                    job_id, component_name, [
                        next_component.get_name()
                        for next_component in next_components
                    ]))
            for next_component in next_components:
                try:
                    schedule_logger.info(
                        'job {} check component {} dependencies status'.format(
                            job_id, next_component.get_name()))
                    dependencies_status = TaskScheduler.check_dependencies(
                        job_id=job_id, dag=dag, component=next_component)
                    schedule_logger.info(
                        'job {} component {} dependencies status is {}'.format(
                            job_id, next_component.get_name(),
                            dependencies_status))
                    if dependencies_status:
                        run_status = TaskScheduler.run_component(
                            job_id, job_runtime_conf, job_parameters,
                            job_initiator, job_args, dag, next_component)
                    else:
                        run_status = False
                except Exception as e:
                    schedule_logger.info(e)
                    run_status = False
                if not run_status:
                    return False
            return True
        else:
            return False