Esempio n. 1
0
def import_id():
    eggroll.init(job_id=generate_job_id(), mode=WORK_MODE)
    request_data = request.json
    table_name_space = "id_library"
    try:
        id_library_info = eggroll.table("info",
                                        table_name_space,
                                        partition=10,
                                        create_if_missing=True,
                                        error_if_exist=False)
        if request_data.request("rangeStart") == 0:
            data_id = generate_job_id()
            id_library_info.put("tmp_data_id", data_id)
        else:
            data_id = id_library_info.request("tmp_data_id")
        data_table = eggroll.table(data_id,
                                   table_name_space,
                                   partition=50,
                                   create_if_missing=True,
                                   error_if_exist=False)
        for i in request_data.request("ids", []):
            data_table.put(i, "")
        if request_data.request("rangeEnd") and request_data.request(
                "total") and (request_data.request("total") -
                              request_data.request("rangeEnd") == 1):
            # end
            new_id_count = data_table.count()
            if new_id_count == request_data["total"]:
                id_library_info.put(
                    data_id,
                    json.dumps({
                        "salt": request_data.request("salt"),
                        "saltMethod": request_data.request("saltMethod")
                    }))
                old_data_id = id_library_info.request("use_data_id")
                id_library_info.put("use_data_id", data_id)
                logger.info(
                    "import id success, dtable name is {}, namespace is {}",
                    data_id, table_name_space)

                # TODO: destroy DTable, should be use a lock
                old_data_table = eggroll.table(old_data_id,
                                               table_name_space,
                                               partition=50,
                                               create_if_missing=True,
                                               error_if_exist=False)
                old_data_table.destroy()
                id_library_info.delete(old_data_id)
            else:
                data_table.destroy()
                return get_json_result(
                    2, "the actual amount of data is not equal to total.")
        return get_json_result()
    except Exception as e:
        logger.exception(e)
        return get_json_result(1, "import error.")
Esempio n. 2
0
def import_offline_feature():
    eggroll.init(job_id=generate_job_id(), mode=WORK_MODE)
    request_data = request.json
    try:
        if not request_data.get("jobId"):
            return get_json_result(status=2, msg="no job id")
        job_id = request_data.get("jobId")
        job_data = query_job_by_id(job_id=job_id)
        if not job_data:
            return get_json_result(status=3,
                                   msg="can not found this job id: %s" %
                                   request_data.get("jobId", ""))
        response = GetFeature.import_data(request_data,
                                          json.loads(job_data[0]["config"]))
        if response.get("status", 1) == 0:
            update_job_by_id(job_id=job_id,
                             update_data={
                                 "status": "success",
                                 "end_date": datetime.datetime.now()
                             })
            return get_json_result()
        else:
            return get_json_result(status=1,
                                   msg="request offline feature error: %s" %
                                   response.get("msg", ""))
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1,
                               msg="request offline feature error: %s" % e)
Esempio n. 3
0
def submit_job():
    _data = request.json
    _job_id = generate_job_id()
    logger.info('generated job_id {}, body {}'.format(_job_id, _data))
    try:
        push_into_job_queue(job_id=_job_id, config=_data)
        return get_json_result(0, "success, job_id {}".format(_job_id))
    except Exception as e:
        return get_json_result(1, "failed, error: {}".format(e))
Esempio n. 4
0
def download_data(data_func):
    _data = request.json
    _job_id = generate_job_id()
    logger.info('generated job_id {}, body {}'.format(_job_id, _data))
    _job_dir = get_job_directory(_job_id)
    os.makedirs(_job_dir, exist_ok=True)
    _download_module = os.path.join(file_utils.get_project_base_directory(),
                                    "arch/api/utils/download.py")
    _upload_module = os.path.join(file_utils.get_project_base_directory(),
                                  "arch/api/utils/upload.py")

    if data_func == "download":
        _module = _download_module
    else:
        _module = _upload_module

    try:
        if data_func == "download":
            progs = [
                "python3", _module, "-j", _job_id, "-c",
                os.path.abspath(_data.get("config_path"))
            ]
        else:
            progs = [
                "python3", _module, "-c",
                os.path.abspath(_data.get("config_path"))
            ]

        logger.info('Starting progs: {}'.format(progs))

        std_log = open(os.path.join(_job_dir, 'std.log'), 'w')
        task_pid_path = os.path.join(_job_dir, 'pids')

        if os.name == 'nt':
            startupinfo = subprocess.STARTUPINFO()
            startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
            startupinfo.wShowWindow = subprocess.SW_HIDE
        else:
            startupinfo = None
        p = subprocess.Popen(progs,
                             stdout=std_log,
                             stderr=std_log,
                             startupinfo=startupinfo)

        os.makedirs(task_pid_path, exist_ok=True)
        with open(os.path.join(task_pid_path, data_func + ".pid"), 'w') as f:
            f.truncate()
            f.write(str(p.pid) + "\n")
            f.flush()

        return get_json_result(0, "success, job_id {}".format(_job_id))
    except Exception as e:
        print(e)
        return get_json_result(-104, "failed, job_id {}".format(_job_id))
Esempio n. 5
0
def load_model():
    request_config = request.json
    _job_id = generate_job_id()
    all_party = set()
    for _party_ids in request_config.get('role').values():
        all_party.update(set(_party_ids))
    for _party_id in all_party:
        st, msg = federated_api(job_id=_job_id,
                                method='POST',
                                url='/model/load/do',
                                party_id=_party_id,
                                json_body=request_config)
    return get_json_result(job_id=_job_id)
Esempio n. 6
0
def new_federated_job(request, overall_timeout=DEFAULT_GRPC_OVERALL_TIMEOUT):
    request_config = request.json
    _job_id = generate_job_id()
    st, msg = federated_api(job_id=_job_id,
                            method='POST',
                            url='/{}/do'.format(request.base_url.replace(request.host_url, '')),
                            party_id=request_config.get('local', {}).get('party_id', PARTY_ID),
                            json_body=request_config,
                            overall_timeout=overall_timeout
                            )
    if st == 0:
        json_body = json.loads(msg)
        return get_json_result(status=json_body['status'], msg=json_body['msg'], data=json_body.get('data'), job_id=json_body['jobId'])
    else:
        return get_json_result(status=st, msg=msg, job_id=_job_id)
Esempio n. 7
0
def load_model():
    request_config = request.json
    _job_id = generate_job_id()
    if request_config.get('gen_table_info', False):
        publish_model.generate_model_info(request_config)
    for role_name, role_partys in request_config.get("role").items():
        if role_name == 'arbiter':
            continue
        for _party_id in role_partys:
            request_config['local'] = {
                'role': role_name,
                'party_id': _party_id
            }
            st, msg = federated_api(job_id=_job_id,
                                    method='POST',
                                    url='/model/load/do',
                                    party_id=_party_id,
                                    json_body=request_config)
    return get_json_result(job_id=_job_id)
Esempio n. 8
0
def load_model():
    config = file_utils.load_json_conf(request.json.get("config_path"))
    _job_id = generate_job_id()
    channel, stub = get_proxy_data_channel()
    for _party_id in config.get("party_ids"):
        config['my_party_id'] = _party_id
        _method = 'POST'
        _url = '/model/load/do'
        _packet = wrap_grpc_packet(config, _method, _url, _party_id, _job_id)
        logger.info(
            'Starting load model job_id:{} party_id:{} method:{} url:{}'.format(_job_id, _party_id,_method, _url))
        try:
            _return = stub.unaryCall(_packet)
            logger.info("Grpc unary response: {}".format(_return))
        except grpc.RpcError as e:
            msg = 'job_id:{} party_id:{} method:{} url:{} Failed to start load model'.format(_job_id,
                                                                                             _party_id,
                                                                                             _method,
                                                                                             _url)
            logger.exception(msg)
            return get_json_result(-101, 'UnaryCall submit to remote manager failed')
    return get_json_result()
Esempio n. 9
0
def download_upload(data_func):
    request_config = request.json
    _job_id = generate_job_id()
    logger.info('generated job_id {}, body {}'.format(_job_id, request_config))
    _job_dir = get_job_directory(_job_id)
    os.makedirs(_job_dir, exist_ok=True)
    module = data_func
    if module == "upload":
        if not os.path.isabs(request_config.get("file", "")):
            request_config["file"] = os.path.join(file_utils.get_project_base_directory(), request_config["file"])
    try:
        request_config["work_mode"] = request_config.get('work_mode', WORK_MODE)
        table_name, namespace = dtable_utils.get_table_info(config=request_config, create=(True if module == 'upload' else False))
        if not table_name or not namespace:
            return get_json_result(status=102, msg='no table name and namespace')
        request_config['table_name'] = table_name
        request_config['namespace'] = namespace
        conf_file_path = new_runtime_conf(job_dir=_job_dir, method=data_func, module=module,
                                          role=request_config.get('local', {}).get("role"),
                                          party_id=request_config.get('local', {}).get("party_id", PARTY_ID))
        file_utils.dump_json_conf(request_config, conf_file_path)
        if module == "download":
            progs = ["python3",
                     os.path.join(file_utils.get_project_base_directory(), JOB_MODULE_CONF[module]["module_path"]),
                     "-j", _job_id,
                     "-c", conf_file_path
                     ]
        else:
            progs = ["python3",
                     os.path.join(file_utils.get_project_base_directory(), JOB_MODULE_CONF[module]["module_path"]),
                     "-c", conf_file_path
                     ]
        p = run_subprocess(job_dir=_job_dir, job_role=data_func, progs=progs)
        return get_json_result(job_id=_job_id, data={'pid': p.pid, 'table_name': request_config['table_name'], 'namespace': request_config['namespace']})
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=-104, msg="failed", job_id=_job_id)
Esempio n. 10
0
def submit_workflow_job():
    _data = request.json
    _job_id = generate_job_id()
    logger.info('generated job_id {}, body {}'.format(_job_id, _data))
    push_into_job_queue(job_id=_job_id, config=_data)
    return get_json_result(job_id=_job_id)