Example #1
0
def import_offline_feature():
    eggroll.init(job_id=generate_job_id(), mode=WORK_MODE)
    request_data = request.json
    try:
        if not request_data.get("jobId"):
            return get_json_result(status=2, msg="no job id")
        job_id = request_data.get("jobId")
        job_data = query_job_by_id(job_id=job_id)
        if not job_data:
            return get_json_result(status=3,
                                   msg="can not found this job id: %s" %
                                   request_data.get("jobId", ""))
        response = GetFeature.import_data(request_data,
                                          json.loads(job_data[0]["config"]))
        if response.get("status", 1) == 0:
            update_job_by_id(job_id=job_id,
                             update_data={
                                 "status": "success",
                                 "end_date": datetime.datetime.now()
                             })
            return get_json_result()
        else:
            return get_json_result(status=1,
                                   msg="request offline feature error: %s" %
                                   response.get("msg", ""))
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1,
                               msg="request offline feature error: %s" % e)
Example #2
0
def do_load_model():
    request_data = request.json
    try:
        request_data["servings"] = server_conf.get("servers", {}).get("servings", [])
        publish_model.load_model(config_data=request_data)
        return get_json_result()
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1, msg="load model error: %s" % e)
Example #3
0
def submit_job():
    _data = request.json
    _job_id = generate_job_id()
    logger.info('generated job_id {}, body {}'.format(_job_id, _data))
    try:
        push_into_job_queue(job_id=_job_id, config=_data)
        return get_json_result(0, "success, job_id {}".format(_job_id))
    except Exception as e:
        return get_json_result(1, "failed, error: {}".format(e))
Example #4
0
def import_id():
    eggroll.init(job_id=generate_job_id(), mode=WORK_MODE)
    request_data = request.json
    table_name_space = "id_library"
    try:
        id_library_info = eggroll.table("info",
                                        table_name_space,
                                        partition=10,
                                        create_if_missing=True,
                                        error_if_exist=False)
        if request_data.request("rangeStart") == 0:
            data_id = generate_job_id()
            id_library_info.put("tmp_data_id", data_id)
        else:
            data_id = id_library_info.request("tmp_data_id")
        data_table = eggroll.table(data_id,
                                   table_name_space,
                                   partition=50,
                                   create_if_missing=True,
                                   error_if_exist=False)
        for i in request_data.request("ids", []):
            data_table.put(i, "")
        if request_data.request("rangeEnd") and request_data.request(
                "total") and (request_data.request("total") -
                              request_data.request("rangeEnd") == 1):
            # end
            new_id_count = data_table.count()
            if new_id_count == request_data["total"]:
                id_library_info.put(
                    data_id,
                    json.dumps({
                        "salt": request_data.request("salt"),
                        "saltMethod": request_data.request("saltMethod")
                    }))
                old_data_id = id_library_info.request("use_data_id")
                id_library_info.put("use_data_id", data_id)
                logger.info(
                    "import id success, dtable name is {}, namespace is {}",
                    data_id, table_name_space)

                # TODO: destroy DTable, should be use a lock
                old_data_table = eggroll.table(old_data_id,
                                               table_name_space,
                                               partition=50,
                                               create_if_missing=True,
                                               error_if_exist=False)
                old_data_table.destroy()
                id_library_info.delete(old_data_id)
            else:
                data_table.destroy()
                return get_json_result(
                    2, "the actual amount of data is not equal to total.")
        return get_json_result()
    except Exception as e:
        logger.exception(e)
        return get_json_result(1, "import error.")
Example #5
0
def query_model_version_history():
    request_data = request.json
    try:
        config = file_utils.load_json_conf(request_data.get("config_path"))
        eggroll.init(mode=WORK_MODE)
        history = version_history(data_table_namespace=config.get("namespace"))
        return get_json_result(msg=json.dumps(history))
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1, msg="load model error: %s" % e)
Example #6
0
def download_data(data_func):
    _data = request.json
    _job_id = generate_job_id()
    logger.info('generated job_id {}, body {}'.format(_job_id, _data))
    _job_dir = get_job_directory(_job_id)
    os.makedirs(_job_dir, exist_ok=True)
    _download_module = os.path.join(file_utils.get_project_base_directory(),
                                    "arch/api/utils/download.py")
    _upload_module = os.path.join(file_utils.get_project_base_directory(),
                                  "arch/api/utils/upload.py")

    if data_func == "download":
        _module = _download_module
    else:
        _module = _upload_module

    try:
        if data_func == "download":
            progs = [
                "python3", _module, "-j", _job_id, "-c",
                os.path.abspath(_data.get("config_path"))
            ]
        else:
            progs = [
                "python3", _module, "-c",
                os.path.abspath(_data.get("config_path"))
            ]

        logger.info('Starting progs: {}'.format(progs))

        std_log = open(os.path.join(_job_dir, 'std.log'), 'w')
        task_pid_path = os.path.join(_job_dir, 'pids')

        if os.name == 'nt':
            startupinfo = subprocess.STARTUPINFO()
            startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
            startupinfo.wShowWindow = subprocess.SW_HIDE
        else:
            startupinfo = None
        p = subprocess.Popen(progs,
                             stdout=std_log,
                             stderr=std_log,
                             startupinfo=startupinfo)

        os.makedirs(task_pid_path, exist_ok=True)
        with open(os.path.join(task_pid_path, data_func + ".pid"), 'w') as f:
            f.truncate()
            f.write(str(p.pid) + "\n")
            f.flush()

        return get_json_result(0, "success, job_id {}".format(_job_id))
    except Exception as e:
        print(e)
        return get_json_result(-104, "failed, job_id {}".format(_job_id))
Example #7
0
def publish_model_online():
    request_data = request.json
    try:
        config = file_utils.load_json_conf(request_data.get("config_path"))
        if not config.get('servings'):
            # get my party all servings
            config['servings'] = SERVINGS
        publish_model.publish_online(config_data=config)
        return get_json_result()
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1, msg="publish model error: %s" % e)
Example #8
0
def stop_workflow(job_id):
    _job_dir = get_job_directory(job_id)
    task_pid_path = os.path.join(_job_dir, 'pids')
    if os.path.isdir(task_pid_path):
        for pid_file in os.listdir(task_pid_path):
            try:
                if not pid_file.endswith('.pid'):
                    continue
                with open(os.path.join(task_pid_path, pid_file), 'r') as f:
                    pids = f.read().split('\n')
                    for pid in pids:
                        try:
                            if len(pid) == 0:
                                continue
                            logger.debug(
                                "terminating process pid:{} {}".format(
                                    pid, pid_file))
                            p = psutil.Process(int(pid))
                            for child in p.children(recursive=True):
                                child.kill()
                            p.kill()
                        except NoSuchProcess:
                            continue
            except Exception as e:
                logger.exception("error")
                continue
        update_job_by_id(job_id=job_id,
                         update_data={
                             "status": "failed",
                             "set_status": "failed"
                         })
        pop_from_job_queue(job_id=job_id)
    return get_json_result()
Example #9
0
 def run_job(self, job_id, config):
     default_runtime_dict = file_utils.load_json_conf('workflow/conf/default_runtime_conf.json')
     setting_conf = file_utils.load_json_conf('workflow/conf/setting_conf.json')
     _job_dir = get_job_directory(job_id=job_id)
     os.makedirs(_job_dir, exist_ok=True)
     ParameterOverride.override_parameter(default_runtime_dict, setting_conf, config, _job_dir)
     logger.info('job_id {} parameters overrode {}'.format(config, _job_dir))
     channel, stub = get_proxy_data_channel()
     for runtime_conf_path in glob.glob(os.path.join(_job_dir, '**', 'runtime_conf.json'), recursive=True):
         runtime_conf = file_utils.load_json_conf(os.path.abspath(runtime_conf_path))
         _role = runtime_conf['local']['role']
         _party_id = runtime_conf['local']['party_id']
         _method = 'POST'
         _module = runtime_conf['module']
         _url = '/workflow/{}/{}/{}'.format(job_id, _module, _role)
         _packet = wrap_grpc_packet(runtime_conf, _method, _url, _party_id, job_id)
         logger.info(
             'Starting workflow job_id:{} party_id:{} role:{} method:{} url:{}'.format(job_id, _party_id,
                                                                                       _role, _method,
                                                                                       _url))
         try:
             _return = stub.unaryCall(_packet)
             logger.info("Grpc unary response: {}".format(_return))
         except grpc.RpcError as e:
             msg = 'job_id:{} party_id:{} role:{} method:{} url:{} Failed to start workflow'.format(job_id,
                                                                                                    _party_id,
                                                                                                    _role, _method,
                                                                                                    _url)
             logger.exception(msg)
             return get_json_result(-101, 'UnaryCall submit to remote manager failed')
Example #10
0
def update_job(job_id):
    request_data = request.json
    update_job_by_id(job_id=job_id, update_data={"status": request_data.get("status")})
    update_job_queue(job_id=job_id, update_data={"status": request_data.get("status")})
    if request_data.get("status") in ["failed", "deleted"]:
        stop_job(job_id=job_id)
    if request_data.get("status") in ["failed", "deleted", "success"]:
        pop_from_job_queue(job_id=job_id)
    return get_json_result()
Example #11
0
def request_offline_feature():
    request_data = request.json
    try:
        job_id = uuid.uuid1().hex
        response = GetFeature.request(job_id, request_data)
        if response.get("status", 1) == 0:
            job_data = dict()
            job_data.update(request_data)
            job_data["begin_date"] = datetime.datetime.now()
            job_data["status"] = "running"
            job_data["config"] = json.dumps(request_data)
            save_job_info(job_id=job_id, **job_data)
            return get_json_result()
        else:
            return get_json_result(status=1,
                                   msg="request offline feature error: %s" %
                                   response.get("msg", ""))
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1,
                               msg="request offline feature error: %s" % e)
Example #12
0
def stop_job(job_id):
    _job_dir = get_job_directory(job_id)
    for runtime_conf_path in glob.glob(os.path.join(_job_dir, '**', 'runtime_conf.json'), recursive=True):
        runtime_conf = file_utils.load_json_conf(os.path.abspath(runtime_conf_path))
        _role = runtime_conf['local']['role']
        _party_id = runtime_conf['local']['party_id']
        _url = '/workflow/{}'.format(job_id)
        _method = 'DELETE'
        _packet = wrap_grpc_packet({}, _method, _url, _party_id, job_id)
        channel, stub = get_proxy_data_channel()
        try:
            _return = stub.unaryCall(_packet)
            logger.info("Grpc unary response: {}".format(_return))
        except grpc.RpcError as e:
            msg = 'job_id:{} party_id:{} role:{} method:{} url:{} Failed to start workflow'.format(job_id,
                                                                                                   _party_id,
                                                                                                   _role, _method,
                                                                                                   _url)
            logger.exception(msg)
            return get_json_result(-101, 'UnaryCall stop to remote manager failed')
    return get_json_result()
Example #13
0
def load_model():
    config = file_utils.load_json_conf(request.json.get("config_path"))
    _job_id = generate_job_id()
    channel, stub = get_proxy_data_channel()
    for _party_id in config.get("party_ids"):
        config['my_party_id'] = _party_id
        _method = 'POST'
        _url = '/model/load/do'
        _packet = wrap_grpc_packet(config, _method, _url, _party_id, _job_id)
        logger.info(
            'Starting load model job_id:{} party_id:{} method:{} url:{}'.format(_job_id, _party_id,_method, _url))
        try:
            _return = stub.unaryCall(_packet)
            logger.info("Grpc unary response: {}".format(_return))
        except grpc.RpcError as e:
            msg = 'job_id:{} party_id:{} method:{} url:{} Failed to start load model'.format(_job_id,
                                                                                             _party_id,
                                                                                             _method,
                                                                                             _url)
            logger.exception(msg)
            return get_json_result(-101, 'UnaryCall submit to remote manager failed')
    return get_json_result()
Example #14
0
def start_workflow(job_id, module, role):
    _data = request.json
    _job_dir = get_job_directory(job_id)
    _party_id = str(_data['local']['party_id'])
    _method = _data['WorkFlowParam']['method']
    conf_path_dir = os.path.join(_job_dir, _method, module, role, _party_id)
    os.makedirs(conf_path_dir, exist_ok=True)
    conf_file_path = os.path.join(conf_path_dir, 'runtime_conf.json')
    with open(conf_file_path, 'w+') as f:
        f.truncate()
        f.write(json.dumps(_data, indent=4))
        f.flush()
    if os.name == 'nt':
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        startupinfo.wShowWindow = subprocess.SW_HIDE
    else:
        startupinfo = None
    task_pid_path = os.path.join(_job_dir, 'pids')
    std_log = open(os.path.join(_job_dir, role + '.std.log'), 'w')

    progs = [
        "python3",
        os.path.join(file_utils.get_project_base_directory(),
                     _data['CodePath']), "-j", job_id, "-c",
        os.path.abspath(conf_file_path)
    ]
    logger.info('Starting progs: {}'.format(" ".join(progs)))

    p = subprocess.Popen(progs,
                         stdout=std_log,
                         stderr=std_log,
                         startupinfo=startupinfo)
    os.makedirs(task_pid_path, exist_ok=True)
    with open(os.path.join(task_pid_path, role + ".pid"), 'w') as f:
        f.truncate()
        f.write(str(p.pid) + "\n")
        f.flush()

    job_data = dict()
    job_data["begin_date"] = datetime.datetime.now()
    job_data["status"] = "ready"
    with open(conf_file_path) as fr:
        config = json.load(fr)
    job_data.update(config)
    job_data["my_role"] = config.get("local", {}).get("role")
    save_job_info(job_id=job_id, **job_data)
    update_job_queue(job_id=job_id, update_data={"status": "ready"})
    return get_json_result(msg="success, pid is %s" % p.pid)