Ejemplo n.º 1
0
def init_source(experiment_id):
    """
    初始化算法需要的初始数据
    :param experiment_id:  试验id
    :return:
    """
    out = redis_util.load('experiment_' + experiment_id)
    if out is not None:
        return rest.responseto(out)

    experiment = experiment_service.get_experiment(experiment_id)
    source = source_service.get_source(experiment.source_id)
    # 获得已经选择的变量,包括target
    selected_variable = variable_service.get_selected_variables(source.id)
    selected = selected_variable['used']
    selected_list = map(lambda x: x.variable_name, selected)
    target = selected_variable['target'].variable_name

    # 解析文件为dataframe
    data_frame = None
    if source.file_type == DBConstant.SOURCE_FILE_TYPE_CSV:
        data_frame = pandas.read_csv(source.file_path)
    elif source_service == DBConstant.SOURCE_FILE_TYPE_EXCEL:
        data_frame = pandas.read_excel(source.file_path)

    # 变量分bin初始化
    out = lr_service.get_init(data_frame, valid=selected_list, target=target, fineMinLeafRate=0)
    redis_util.dump('experiment_' + experiment_id, out)

    return rest.responseto(out)
Ejemplo n.º 2
0
def if_applyed():
    """
    在挑选变量进入ks计算之前,判断woe是否已经apply到train文件了

    """
    if safely_apply:
        return rest.responseto(success=True)
    else:
        return rest.responseto(success=False)
Ejemplo n.º 3
0
def create_model_name():
    model_name = request.args.get("model_name")

    if tool_model_service.load_model(model_name=model_name,
                                     model_branch="master") is None:
        if tool_model_service.create_branch():
            return rest.responseto({
                "model_name": model_name,
                "model_branch": "master"
            })
        else:
            return rest.responseto("create fail", success=False)
    return rest.responseto("name exist", success=False)
Ejemplo n.º 4
0
def parse():
    # 对train文件进行转换,分析
    model_name = request.form.get("modelName")
    branch = request.form.get("branch")
    # 用户指定的文件相对路径
    file_path = request.form.get("filePath")
    target = request.form.get("target")
    root_path = app.config["ROOT_PATH"]

    path = root_path + "/" + file_path
    # 以模型名称和分支名作为唯一的key
    key = model_name + "_" + branch
    # df_train = None
    # 流程继续下去的前提就是路径是真实存在的
    if os.path.exists(path):
        # 检查是否已经加载过了
        if global_value.has_key(key) is False:
            # 重新加载资源
            df_all = pd.read_excel(path)
            df_train = df_all[df_all['dev_ind'] == 1]
            df_test = df_all[df_all['dev_ind'] == 0]
            df_map = {
                model_name + "_" + branch: {
                    "df_all": df_all,
                    "df_train": df_train,
                    "df_test": df_test
                }
            }
            global_value.set_value(**df_map)
        else:
            df_map = global_value.get_value(key)
            df_train = df_map['df_train']
        df = ba.get_df_summary(df_train)
        # 得到df_train,将dataframe转换为用于展示前端展示的数据
        data_map = cmm.df_for_html(df)

        result = tool_model_service.load_model(model_name=model_name,
                                               model_branch=branch)
        branches = []
        v = result[0]
        for n in result:
            branches.append(n.model_branch)

        # data_map["current_model"] = model_name
        data_map["branches"] = branches
        data_map["selected_list"] = v.selected_list
        data_map["target"] = v.model_target

        return rest.responseto(data=data_map)
    else:
        return rest.responseto(message="file not exist", success=False)
Ejemplo n.º 5
0
def get_projects(user_id):
    """
    获得该用户所有的工程
    :return:
    """
    projects = project_service.get_projects(user_id)
    return rest.responseto(projects,cls = tool_model.AlchemyEncoder)
Ejemplo n.º 6
0
def divide_manually():
    boundary = request.form.get("boundary")
    variable_name = request.form.get("variable_name")
    branch = request.form.get("branch")
    model_name = request.form.get("model_name")
    type = request.form.get("type")

    df_map = global_value.get_value(model_name + "_" + branch)
    df_train = df_map['df_train']

    boundary_list = []
    if type == "true":
        for s in boundary.split(","):
            temp = []
            temp.extend(map(cmm.transfer, s.split("|")))
            boundary_list.append(temp)
        columns = ['bin_num', variable_name, 'bads', 'goods', 'total', 'total_perc', 'bad_rate', 'woe',
                   'type']

    else:
        for s in boundary.split(","):
            boundary_list.append(float(s))
        columns = ['bin_num', 'min', 'max', 'min_boundary', 'max_boundary', 'bads', 'goods', 'total', 'total_perc',
                   'bad_rate', 'woe',
                   'type']

    target = tool_model_service.load_model(model_name=model_name, model_branch=branch)[0]["model_target"]
    result = bf.adjust_bin(df_train, type == "true", variable_name, boundary_list
                           , target=target, expected_column={variable_name})

    iv = result['IV'].sum()
    df = pd.DataFrame(result,
                      columns=columns)
    data = generate_response(variable_name, df, iv)
    return rest.responseto(data=data)
Ejemplo n.º 7
0
def user_register():
    """
    用户注册接口
    :param email:用户邮箱
    :param password: 用户密码
    :param nick_name: 用户昵称
    :return: 注册是否成功
    """
    email = request.form.get("email")
    password = request.form.get("password")
    nick_name = request.form.get("nick_name")

    if email is None or password is None:
        raise Error.USER_LACK_NECESSARY_INFO

    user = tool_model.User()
    user.user_email = email
    # 将密码进行MD5加密
    user.user_password = cmm.to_md5(password)
    user.user_nick = nick_name

    result = user_service.add_user(user)
    if result is None:
        result = False
    else:
        result = True

    return rest.responseto(result)
Ejemplo n.º 8
0
def get_res(key):
    url = es_host + key+"/_search?pretty"
    d = json.dumps({"size": 5000})
    # response = requests.get(url)
    response = requests.post(url, data=d)
    # print(response)
    return rest.responseto(data=json.loads(response.text))
Ejemplo n.º 9
0
def init():
    """
    初始化binning record
    :return: dict
    格式:
        {variable_name1
                -var_table
                    -var_params(province,goods,bads...)
                -is_selected
                -iv
         variable_name2...}
    """
    name = request.form.get("modelName")
    branch = request.form.get("branch")

    # 根据key从内存中获取已上传文件的相关参数
    df_map = global_value.get_value(name + "_" + branch)

    result = tool_model_service.load_model(model_name=name, model_branch=branch)
    # selected_list在数据库中是json格式,在python中是一个dict,格式为:select_variable:index(变量的位置)
    selected_list_json = json.loads(result[0].selected_list)
    selected_list = selected_list_json.keys()

    min_val = 0
    df = df_map['df_train']
    init_result = get_init(df, target=result[0].model_target, valid=selected_list)

    # 根据init_result获得变量的区间
    out = get_boundary(init_result, min_val)
    # 根据iv排序
    out_sorted_iv = sort_iv(out)
    return rest.responseto(data=out_sorted_iv)
Ejemplo n.º 10
0
def rank():
    """
    外部按钮点击触发排序
    :return: 返回的结果与init一致
    """
    data = request.form.get("data")
    json_obj = json.loads(data)
    out_sorted_iv = sort_iv(json_obj)
    return rest.responseto(data=out_sorted_iv)
Ejemplo n.º 11
0
def login():
    """
    用户登录
    :return: 邮箱和密码是否一致
    """
    email = request.form.get('email')
    password = request.form.get('password')
    result = user_service.auth_user(email, password)
    return rest.responseto(result)
Ejemplo n.º 12
0
def handle_error(e):
    """
    全局异常拦截
    :param e: 被截获的异常
    :return: 接口抛出异常后直接将这个异常返回
    """
    code = 500
    if isinstance(e, HoneybeeException):
        code = e.code
    log.error(e, exc_info=1)
    return rest.responseto(None, message=e.message, code=code, success=False)
Ejemplo n.º 13
0
def save():
    model_name = request.values.get("model_name")
    branch = request.values.get("branch")
    data = request.values.get("data")
    dict = json.loads(data)

    tool_model_service.del_binning_record(model_name, branch)

    list = []
    for key, val in dict.items():
        obj = ModelContent(model_name=model_name,
                    model_branch=branch,
                    variable_name=key,
                    variable_iv=val["iv"],
                    binning_record=json.dumps(val["var_table"],ensure_ascii=False),
                    is_selected=val["is_selected"])
        list.append(obj)
    if tool_model_service.save_binning_record(list) is not True:
        return rest.responseto(success=False)
    return rest.responseto()
Ejemplo n.º 14
0
def init_model_name():
    """
    training文件变量初始化页面,将会初始化各个模型的条目供用户选择
    :return: [model_name1,model_name2,model_name3]
    """
    result = tool_model_service.load_model(is_deleted=0)
    # only get  model_name form result

    result = list(set(map(lambda x: x.model_name, result)))

    return rest.responseto(result)
Ejemplo n.º 15
0
def get_branch_name():
    """
    根据模型名称获得分支信息和文件路径
    :param: model_name
    :return: model_name file_path
    """
    model_name = request.args.get("modelName")
    result = tool_model_service.load_model(model_name=model_name)
    # get model_branch and file_path from result
    result = list(map(lambda x: x.model_branch, result))

    return rest.responseto(result)
Ejemplo n.º 16
0
def upload():
    """工具依赖的源文件修改"""
    # 在跨域的情况下,前端会发送OPTIONS请求进行试探,然后再发送POST请求
    if request.method == 'POST':
        # 获取training文件上传的路径
        storage = app.config['ROOT_PATH']

        files = request.files.getlist("file[]")
        for file in files:
            from unicodedata import normalize
            filename = normalize('NFKD', file.filename).encode('utf-8', 'ignore')
            file_path = storage + "/" + filename
            # filename = secure_filename(file.filename.decode('utf-8'))
            if (os.path.exists(file_path)):
                return rest.responseto(data="file exist", success=False)
            else:
                file.save(file_path)
                model_name = filename.split(".")
                tool_model_service.create_branch(model_name=model_name[0], model_branch="master",
                                                 create_date=datetime.now(), modify_date=datetime.now(),
                                                 file_path=file_path, model_target="")
    return rest.responseto(data="success")
Ejemplo n.º 17
0
def load_all():
    model_name = request.values.get("model_name")
    branch = request.values.get("branch")

    result = tool_model_service.load_binning_record(model_name, branch)

    data = {}
    if result is not None:
        for row in result:
            data[row["variable_name"]] = {"iv": row["variable_iv"],
                                          "var_table": json.loads(row["binning_record"]),
                                          "is_selected":row["is_selected"]==const.SELECTED}
    return rest.responseto(data = data)
Ejemplo n.º 18
0
def new_branch():
    model_name = request.form.get("model_name")
    branch = request.form.get("branch")
    original_branch = request.form.get("original_branch")

    result = tool_model_service.load_binning_record(model_name,
                                                    original_branch)

    list = []
    for record in result:
        obj = ModelContent(model_name=model_name,
                           model_branch=branch,
                           variable_name=record["variable_name"],
                           variable_iv=record["variable_iv"],
                           binning_record=record["binning_record"],
                           is_selected=record["is_selected"])
        list.append(obj)

    if tool_model_service.copy_branch(model_name, branch, original_branch):
        tool_model_service.save_binning_record(list)
        return rest.responseto(data=True)
    return rest.responseto(data=False)
Ejemplo n.º 19
0
def update_source(source_id):
    set_name = request.form.get("setName")
    if_delete = request.form.get("ifDelete")

    source = tool_model.Source()
    source.id = source_id
    source.set_name = set_name
    source.is_deleted = 1 if bool(if_delete) else None

    num = source_service.update_source(source)
    if num > 0:
        return rest.responseto("")
    else:
        raise Error.SOURCE_UPDATE_FAIL
Ejemplo n.º 20
0
def load_applyed():
    """读取apply后的文件"""
    # 在跨域的情况下,前端会发送OPTIONS请求进行试探,然后再发送POST请求
    if request.method == 'POST':
        # 获取training文件上传的路径
        model_name = request.form.get("model_name")
        branch = request.form.get("branch")
        files = request.files.getlist("file[]")
        for file in files:
            df_map = global_value.get_value(model_name + "_" + branch)
            df = pd.read_excel(file, encoding="utf-8")
            df_map["df_train_woe"] = df[df['dev_ind'] == 1]
            df_map["df_test_woe"] = df[df['dev_ind'] == 0]
    return rest.responseto(data="success")
Ejemplo n.º 21
0
def use_or_not(source_id):
    """
    预览数据集后,选择有价值的变量
    :return: 如果操作不顺利,异常在外层将会被拦截
    """
    form = request.form
    selected_variable_str = form.get('selected')
    target = form.get('target')

    selected_variable_list = None
    if selected_variable_str is not None:
        selected_variable_list = selected_variable_str.split(",")
    variable_service.set_variable_selected(source_id, selected_variable_list,
                                           target)

    return rest.responseto("")
Ejemplo n.º 22
0
def add_project():
    """
    新建一个工程
    :param:projectName 工程名称
    :param:projectTask 工程处理的模型类型
    :param:projectDesc 工程描述
    :return: 是否添加成功 True or False
    """
    form = request.form
    project = tool_model.Project()
    project.owner_id = form.get("ownerId")
    project.project_name = form.get("projectName")
    project.project_task = form.get("projectTask")
    project.project_desc = form.get("projectDesc")

    result = project_service.create_project(project)[0]
    return rest.responseto(result)
Ejemplo n.º 23
0
def add_experiment():
    """
    添加一个算法实验
    :param title: 试验名称
    :param project_id: 工程id
    :param source_id: 资源id
    :param algorithm: 算法代号
    :return: primary_key
    """
    form = request.form
    experiment = tool_model.Experiment()
    experiment.title = form.get('title')
    experiment.project_id = form.get('project_id')
    experiment.source_id = form.get('source_id')
    experiment.algorithm = form.get('algorithm')

    primary_key = experiment_service.create_experiment(experiment)
    return rest.responseto(primary_key)
Ejemplo n.º 24
0
def list_source(project_id):
    """
    罗列该工程所有的资源
    :param project_id: 所属工程的id
    :return: 所有资源的相关参数
    """
    sources = source_service.get_sources(project_id=project_id)

    def map_value(source):
        d = {
            'setName': source.set_name,
            'fileName': source.file_name,
            'fileSize': source.file_size,
            'fileScope': source.file_scope,
            'addAt': source.create_date.isoformat(sep=" "),
            'origin': source.file_origin,
            'readable': source.file_readable
        }
        return d

    list = map(map_value, sources)
    return rest.responseto(list)
Ejemplo n.º 25
0
def add_source():
    """
    新增一个数据源
    :param:projectId 所属工程的id
    :param:fileScope 文件可被访问的范围
    :param:fileName 上传的文件的别名
    :param:dataSet 上传的文件
    :param:fileOrigin 文件的来源
    :return: 是否添加成功 True or False
    """
    form = request.form
    source = tool_model.Source()
    source.project_id = form.get('projectId')
    source.file_scope = form.get('fileScope')
    source.set_name = form.get('setName')  # 用户为dataset指定的别名
    source.file_origin = form.get('fileOrigin')  # 文件的来源

    # 获取上传的文件并判断
    files = request.files.getlist("dataSet")
    if len(files) <= 0:
        # 若文件不存在时将会抛出异常
        raise Error.SOURCE_NO_FILE_RECEIVED

    f = files[0]
    from unicodedata import normalize
    file_name = normalize('NFKD', f.filename).encode('utf-8', 'ignore')
    file_path = app.config['ROOT_PATH'] + "/" + file_name
    file_type = file_name[file_name.find('.') + 1:]
    # 保存文件至本地
    f.save(file_path)
    size = simple_util.get_file_size(file_path)

    source.file_type = file_type
    source.file_name = file_name  # 上传的文件的名称
    source.file_path = file_path
    source.file_size = size

    result = source_service.add_source(source)[0]
    return rest.responseto(result)
Ejemplo n.º 26
0
def commit_branch():
    model_name = request.form.get("model_name")
    branch = request.form.get("branch")
    selected_list = request.form.get("selected_list")
    target = request.form.get("target")
    file_path = request.form.get("file_path")

    root_path = app.config["ROOT_PATH"]
    path = root_path + "/" + file_path

    key = model_name+"_"+branch
    if global_value.has_key(key) is False:
        # 重新加载资源
        df_all = pd.read_excel(path)
        df_train = df_all[df_all['dev_ind'] == 1]
        df_test = df_all[df_all['dev_ind'] == 0]
        df_map = {model_name + "_" + branch:
                      {"df_all": df_all,
                       "df_train": df_train,
                       "df_test": df_test}}
        global_value.set_value(**df_map)

    return rest.responseto(data=tool_model_service.update_branch(model_name, branch, target, selected_list=selected_list, file_path = file_path))
Ejemplo n.º 27
0
def checkout():
    model_name = request.values.get("model_name")
    branch = request.values.get("branch")
    result = tool_model_service.load_model(model_name=model_name, model_branch = branch)

    return rest.responseto(data=result[0])
Ejemplo n.º 28
0
def es_req(key):
    url = app.config["es_host"] + key + "/_search?pretty"
    response = requests.get(url)
    return rest.responseto(data=json.loads(response.text))
Ejemplo n.º 29
0
def divide():
    """
    分裂操作
    先将从data中得到的范围,从excel中筛选相应的数据
    筛选完成后,调用init方法对数据进行初始化,得到一定数据的范围区间
    将该范围区间与原来的区间合并.
    调用adjust方法获得的结果即为分裂后的结果
    :return:
        {variable_name1
                -var_table
                    -var_params{province,goods,bads...}
                -iv
        }
    """
    model_name = request.form.get("modelName")
    branch = request.form.get('branch')
    df_map = global_value.get_value(model_name + "_" + branch)
    df_train = df_map['df_train']
    min_val = 0
    data = request.form.get('data')
    # 解析json
    data_map = json.loads(data, object_pairs_hook=OrderedDict)
    name = data_map["name"]
    target = request.form.get("target")
    # 将excel转化为dataframe,只读取target和name两列
    df = pd.DataFrame(df_train, columns={target, name})

    bound_list = None
    if data_map["selected"]["type"] == 'Numerical':
        # 根据min和max的边界去筛选数据
        min = data_map["selected"]["min_boundary"]
        max = data_map["selected"]["max_boundary"]
        df = df[(df[name].astype(float) >= float(min)) & (df[name].astype(float) < float(max))]
        out = get_init(df, target=target, invalid=[], fineMinLeafRate=0)
        bound_list = get_divide_min_bound(out)

        list = data_map["table"]
        # 删除要被分裂的项
        del list[data_map["selectedIndex"]]

        for v in list:
            bound_list.append(float(v["min_boundary"]))
        # bound_list.append(np.nan)

        result = bf.adjust_bin(df_train, data_map["selected"]["type"] == 'Categorical', name, bound_list
                               , target=target, expected_column={name})
        columns = ['bin_num', 'min', 'max', 'min_boundary', 'max_boundary', 'bads', 'goods', 'total', 'total_perc',
                   'bad_rate', 'woe',
                   'type']
        iv = result['IV'].sum()
        df = pd.DataFrame(result,
                          columns=columns)
        data = generate_response(name, df, iv)
        # data = get_merged(name, df, min_val)

        return rest.responseto(data=data)
    else:
        val = data_map["selected"][name].split("|")
        df[name] = df[name].apply(lambda x: simple_util.float_nan_to_str_nan(x))

        df = df[df[name].isin(val)]

        list = data_map["table"]
        # 删除要被分裂的项
        del list[data_map["selectedIndex"]]

        out = get_init(df, target=target, invalid=[], fineMinLeafRate=0)
        bound_list = get_divide_caterotical_bound(out, name)
        # 被分裂的项的下标
        index = data_map["selectedIndex"]
        # 将分裂的结果加入原有的列表中
        for v in list:
            bound_list.append(map(cmm.transfer, v[name].split("|")))
        result = bf.adjust_bin(df_train, data_map["selected"]["type"] == 'Categorical', name, bound_list
                               , target=target, expected_column={name})
        iv = result['IV'].sum()
        columns = ['bin_num', name, 'bads', 'goods', 'total', 'total_perc', 'bad_rate', 'woe',
                   'type']
        df = pd.DataFrame(result,
                          columns=columns)

        data = generate_response(name, df, iv)
        # data = get_merged(name, df, min_val)
        return rest.responseto(data=data)
Ejemplo n.º 30
0
def merge():
    """归并操作"""
    model_name = request.form.get("modelName")
    branch = request.form.get("branch")
    # 要执行合并的variable
    var_name = request.form.get('varName')
    # 变量的类型
    type = request.form.get('type').encode('utf-8')
    # 选定的范围
    boundary = request.form.get('boundary').encode('utf-8')  # 每个bin_num的max的大小,都以逗号隔开
    # 总的范围
    all_boundary = request.form.get('allBoundary').encode('utf-8')  # 每个bin_num的max的大小,都以逗号隔开
    # 获得target
    # target = request.form.get('allBoundary').encode('utf-8');
    target = request.form.get('target')
    if target is None:
        target = 'bad_4w'
    excepted_column = {var_name}

    min_val = 0

    df_map = global_value.get_value(model_name + "_" + branch)

    result = None
    type_bool = False
    df = None
    if type == 'Numerical':
        # 将字符转换为list
        boundary_list = map(eval, boundary.split("&"))
        all_boundary_list = []
        # 将字符转换为list,nan替换为np.nan
        for a in all_boundary.split("&"):
            if a != 'nan':
                a = float(a)
            else:
                a = np.nan
            all_boundary_list.append(a)
        boundary_list = list(set(all_boundary_list).difference(set(boundary_list)))
        # boundary_list.append(np.nan)
        selected_list = boundary_list

        columns = ['bin_num', 'min', 'max', 'min_boundary', 'max_boundary', 'bads', 'goods', 'total', 'total_perc',
                   'bad_rate', 'woe',
                   'type']
    else:
        type_bool = True
        temp = []
        for s in boundary.split("&"):
            temp.extend(map(cmm.transfer, s.split("|")))

        selected_list = [temp]
        if all_boundary != '':
            for s in all_boundary.split("&"):
                selected_list.append(map(cmm.transfer, s.split("|")))

        columns = ['bin_num', var_name, 'bads', 'goods', 'total', 'total_perc', 'bad_rate', 'woe',
                   'type']

    result = bf.adjust_bin(df_map["df_train"], type_bool, var_name, selected_list, target=target,
                           expected_column=excepted_column)  # 获得合并的结果
    iv = result['IV'].sum()

    df = pd.DataFrame(result,
                      columns=columns)

    data = generate_response(var_name, df, iv)
    # data = get_merged(var_name, df, min_val)
    return rest.responseto(data=data)