Example #1
0
def second_evaluation_core(spark_session, condition, operator_id):
    """
    二分类评估核心函数
    :param spark_session:
    :param condition:
    :param operator_id:
    :return:
    """
    # 读模型
    # 当前节点(评估节点)一个父节点
    operator = OperatorDao.get_operator_by_id(operator_id)
    # 父节点(预测节点) 两个父节点
    father_id = operator.father_operator_ids
    father_operator = OperatorDao.get_operator_by_id(father_id)
    # 祖节点(模型节点和读预测数据节点)
    grand_father_ids = father_operator.father_operator_ids.split(',')
    print("**********祖节点(模型节点和读预测数据源节点):", grand_father_ids)

    # 读数据
    def get_predict_data(operator_config_):
        for grand_father_file_ in operator_config_:
            grand_father_id_ = list(grand_father_file_.keys())[0]
            grand_father_ = OperatorDao.get_operator_by_id(grand_father_id_)
            if grand_father_.operator_type_id == 5001 or grand_father_.operator_type_id < 3000:
                print("***************评估函数,预测数据:",
                      grand_father_.operator_type_id)
                pre_data_file_url = grand_father_.operator_output_url.split(
                    '*,')[grand_father_file_[grand_father_id_]]
                print("***************评估函数,预测数据url:", pre_data_file_url)
                return read_data(spark_session, pre_data_file_url)

    print("**********预测节点:", father_operator.operator_config)
    df = get_predict_data(
        json.loads(father_operator.operator_config)['fileUrl'])

    # 评估
    for grand_father_id in grand_father_ids:
        grand_father = OperatorDao.get_operator_by_id(grand_father_id)
        grand_father_operator_type = grand_father.operator_type_id
        # 模型加载节点
        if grand_father_operator_type == 8000:
            grand_father_operator_type = json.loads(
                grand_father.operator_config)['parameter']['modelTypeId']
        if grand_father_operator_type == 6001:  # svm二分类节点
            print("***************评估函数,训练模型", grand_father.operator_type_id)
            evaluation_df = svm_second_evaluation(
                spark_session, grand_father.operator_output_url, df,
                json.loads(father_operator.operator_config)['parameter'],
                condition)
            return evaluation_df
        elif grand_father_operator_type == 6003:  # lr二分类节点
            print("***************评估函数,训练模型", grand_father.operator_type_id)
            evaluation_df = lr_second_evaluation(
                spark_session, grand_father.operator_output_url, df,
                json.loads(father_operator.operator_config)['parameter'],
                condition)
            return evaluation_df
Example #2
0
def frequency_statistics(spark_session, operator_id, file_url, condition):
    """
    频次统计
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data_pandas(file_url)
        # 频次统计函数
        result_df = frequency_statistics_core(df, condition)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_file_url = save_data_pandas(result_df)
            run_info = '频次统计算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #3
0
def second_evaluation(spark_session, operator_id, condition):
    """
    二分类评估
    :param spark_session:
    :param operator_id:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 评估函数
        result_df = second_evaluation_core(spark_session, condition,
                                           operator_id)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_df.show()
            result_file_url = save_data(result_df)
            run_info = '评估算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #4
0
def one_hot_encoder(spark_session, operator_id, file_url, condition):
    """
    独热编码页面路由
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:{"userId":1,"projectId":32,"columnNames":["数量","数量"],"newColumnNames":["独热编码1","独热编码2"]}
    :return:
    """

    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 独热编码函数
        result_df = one_hot_encoder_core(df, condition)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_file_url = save_data(result_df)
            run_info = '独热编码算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #5
0
def correlation_coefficient(spark_session, operator_id, file_url, condition):
    """
    相关系数
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data_pandas(file_url)
        # 相关系数函数
        result_df = correlation_coefficient_core(df, condition)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_file_url = save_data_pandas(result_df, '', '', 1)
            run_info = '相关系数算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #6
0
def chiSqSelector(spark_session, operator_id, file_url, condition):
    """
    卡方选择
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 卡方选择函数
        result_df = chiSqSelector_core(df, condition)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_file_url = save_data(result_df)
            run_info = '卡方选择算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #7
0
def quantile_discretization(spark_session, operator_id, file_url, condition):
    """
    分位数离散化页面路由
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 分位数离散化函数
        result_df = quantile_discretization_core(df, condition)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_df.show()
            result_file_url = save_data(result_df)
            run_info = '分位数离散化算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #8
0
def save_ml_model(operator_id, user_id, name):
    """
    保存训练模型
    :param operator_id:
    :param user_id:
    :param name:
    :return:
    """
    # 查看算子
    operator = OperatorDao.get_operator_by_id(operator_id)
    if operator.operator_type_id > 7000 or operator.operator_type_id < 6001:
        return "所选择的节点并不是模型算子节点"
    if operator.status != "success":
        return "请执行该节点"
    if operator.operator_output_url is not None:
        operator_output_url = operator.operator_output_url.split('*,')
    else:
        return "没有运行结果"

    model_url = operator_output_url[0]
    operator_type_id = operator.operator_type_id
    model_id = operator.model_id

    # 查看执行流程model
    model = ModelDao.get_model_by_id(model_id)
    project_id = model.project_id

    ml_model = MLModel(user_id=user_id,
                       project_id=project_id,
                       model_id=model_id,
                       status='save',
                       name=name,
                       operator_type_id=operator_type_id,
                       model_url=model_url)
    return MLModelDao.create_ml_model(ml_model)
Example #9
0
def get_model_by_project_id(project_id):
    """
    获取项目对应的model(执行流程)
    :param project_id:
    :return:
    """
    # 获取 model
    model = ModelDao.get_model_by_project_id(project_id)
    if model is False:
        return False

    # 获取 operator
    operators = OperatorDao.get_operator_by_model_id(model.id)
    if operators is False:
        return False

    # 获取 operator_type
    operator_types = OperatorTypeDao.get_all_operator_type()
    if operator_types is False:
        return False

    # TODO : 查询数据源表

    operator_types_dict = dict()
    for operator_type in operator_types:
        operator_types_dict[operator_type.id] = operator_type

    # 返回结果
    config = dict()
    for operator in operators:
        if operator_types_dict[operator.operator_type_id].id == 5001:
            data_operator_type = json.loads(
                operator.operator_config)['fileUrl'][0][operator.id].split(
                    '/')[-1]
        else:
            data_operator_type = operator_types_dict[
                operator.operator_type_id].type_name
        config[operator.id] = {
            'type': data_operator_type,
            'name': operator_types_dict[operator.operator_type_id].id,
            'location': json.loads(operator.operator_style)['location'],
            'config': json.loads(operator.operator_config),
            'next': operator.child_operator_ids.split(','),
            "pre": operator.father_operator_ids.split(',')
        }
    model_config = json.loads(model.config)
    relationship = []
    for item in model_config['relationship'].split('*,'):
        relationship.append(list_str_to_list(item))
    config_order = json.loads(model_config['config_order'])
    return {
        'projectId': project_id,
        'config': config,
        'startNode': model.start_nodes.split(','),
        'relationship': relationship,
        'config_order': config_order
    }
Example #10
0
 def get_predict_data(operator_config_):
     for grand_father_file_ in operator_config_:
         grand_father_id_ = list(grand_father_file_.keys())[0]
         grand_father_ = OperatorDao.get_operator_by_id(grand_father_id_)
         if grand_father_.operator_type_id == 5001 or grand_father_.operator_type_id < 3000:
             print("***************评估函数,预测数据:",
                   grand_father_.operator_type_id)
             pre_data_file_url = grand_father_.operator_output_url.split(
                 '*,')[grand_father_file_[grand_father_id_]]
             print("***************评估函数,预测数据url:", pre_data_file_url)
             return read_data(spark_session, pre_data_file_url)
Example #11
0
def filter_multi_conditions(spark_session, operator_id, file_url, condition):
    """
    按照多个条件进行过滤

    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition: {"userId":1,"projectId":32,"parameter":[{"colName":"利润", "operate":">","value":"100", "relation":"AND"},{"colName":"装运方式", "operate":"==", "value":"一级", "relation":""}]}
    :return:
    """

    try:

        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 过滤函数
        result_df = filter_core(spark_session, df, condition['parameter'])
        # 存储结果
        result_file_url = save_data(result_df)

        run_info = '过滤算子执行成功'
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]
    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #12
0
def column_map(spark_session, operator_id, file_url, condition):
    """
    列映射
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:{"userId":1,"projectId":32,"parameter":[{"colName_1":"利润", "operate_1":"+","value_1":"100","operate":"+","colName_2":"数量", "operate_2":"*","value_2":"0.0001","newName":"newCol1"},{"colName_1":"利润", "operate_1":"+","value_1":"10","operate":"*","colName_2":"数量", "operate_2":"*","value_2":"0.1","newName":"newCol2"}]}
    :return:
    """

    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 列映射函数
        result_df = column_map_core(df, condition["parameter"])
        # 存储结果
        result_file_url = save_data(result_df)
        # 修改计算状态
        run_info = '列映射算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #13
0
def fill_null_value(spark_session, operator_id, file_url, condition):
    """
    填充空值
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition: {'userId':1,'projectId':32,'parameter':[{'operate':'均值填充','colName':''},{'operate':'均值填充','colName':'最大值填充'}]}
    :return:
    """

    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 空值填充函数
        result_df = fill_null_value_core(df, condition["parameter"])
        # 存储结果
        result_file_url = save_data(result_df)
        # 修改计算状态
        run_info = '数据替换算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #14
0
def replace(spark_session, operator_id, file_url, condition):
    """
    数据替换
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition: {"userId": 1, "projectId": 32, "columnNames": ["类别", "子类别", "客户名称"],"replaceCharacters":[{"source":"技术","target":"技术copy"},{"source":"电话","target":"电话copy"}]}
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 替换函数
        result_df = replace_core(df, condition)
        # 存储结果
        result_file_url = save_data(result_df)
        # 修改计算状态
        run_info = '数据替换算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #15
0
def columns_merge(spark_session, operator_id, file_url, condition):
    """
    多列合并
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition: {"userId": 1, "projectId": 32, "columnNames": ["类别", "子类别", "产品名称"], "connector": "-", "newColumnName": "品类名称"}
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 合并函数
        result_df = columns_merge_core(df, condition)
        # 存储结果
        result_file_url = save_data(result_df)
        # 修改计算状态
        run_info = '多列合并算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #16
0
def column_split(spark_session, operator_id, file_url, condition):
    """
    按列拆分
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:  {"userId": 1, "projectId": 32, "columnName": "订购日期", "delimiter": "/", "newColumnNames": ["year", "月"]}
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 拆分函数
        result_df = column_split_core(spark_session, df, condition)
        # 存储结果
        result_file_url = save_data(result_df)
        # 修改计算状态
        run_info = '拆分算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #17
0
def read_data_with_update_record(spark_session, operator_id, file_url):
    """
    读数据算子,拷贝数据并更新算子记录表

    :param spark_session:
    :param operator_id:
    :param file_url:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 存储结果
        result_file_url = save_data(df)

        run_info = 'read_data算子执行成功'
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]
    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #18
0
def sort(spark_session, operator_id, file_url, condition):
    """
    排序

    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition: {"userId":1,"projectId":32,"columnName":"利润","sortType":"降序"}
    :return:
    """

    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 过滤函数
        result_df = sort_core(df, condition['columnName'],
                              condition['sortType'])
        # 存储结果
        result_file_url = save_data(result_df)
        # TODO :判断返回结果是否是String(异常信息)
        run_info = '排序算子执行成功'
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_file_url, run_info)
        return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #19
0
def ml_predict_core(spark_session, operator_id, df, model_url, condition):
    """
    路由控制加载哪种模型进行预测
    :param spark_session:
    :param operator_id:
    :param df:
    :param model_url:
    :param condition:
    :return:  预测结果 sparkframe
    """

    # 父节点是什么组件
    operator = OperatorDao.get_operator_by_id(operator_id)
    father_ids = operator.father_operator_ids.split(',')
    print("**********", operator.father_operator_ids)
    for father_id in father_ids:
        father = OperatorDao.get_operator_by_id(father_id)
        print("***************", father.operator_type_id)
        print("---------------", father.operator_type_id == 6001)
        operator_type_flag = father.operator_type_id

        # 模型加载节点
        if operator_type_flag == 8000:
            operator_type_flag = json.loads(
                father.operator_config)['parameter']['modelTypeId']

        if operator_type_flag == 6001:  # svm二分类
            prediction_df = svm_second_predict(spark_session, model_url, df,
                                               condition)
        elif operator_type_flag == 6002:  # gbdt二分类
            prediction_df = gbdt_second_predict(model_url, df, condition)
        elif operator_type_flag == 6003:  # lr二分类
            prediction_df = lr_second_predict(model_url, df, condition)
        elif operator_type_flag == 6004:  # lr多分类
            prediction_df = lr_multiple_predict(model_url, df, condition)
        elif operator_type_flag == 6005:  # mpc多分类
            prediction_df = mpc_multiple_predict(model_url, df, condition)

    # 根据父组件的类型决定加载哪种模型
    return prediction_df
Example #20
0
def random_split(spark_session, operator_id, file_url, condition):
    """
    按照比例随机划分数据
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 划分函数
        (result_df1, result_df2) = random_split_core(df, condition)
        # 存储结果
        result_file_url1 = save_data(result_df1)
        result_file_url2 = save_data(result_df2)
        # 修改计算状态
        run_info = '列映射算子执行成功'
        OperatorDao.update_operator_by_id(
            operator_id, 'success', result_file_url1 + "*," + result_file_url2,
            run_info)
        return [result_file_url1, result_file_url2]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
        return []
Example #21
0
def gbdt(spark_session, operator_id, file_url, condition):
    """
    # GBDT(Gradient Boosting Decision Tree) 又叫 MART(Multiple Additive Regression Tree),是一种迭代的决策树算法,
    # 该算法由多棵决策树组成,所有树的结论累加起来做最终答案。
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # svm_core函数
        result_model_url = gbdt_core(df, condition)
        # 修改计算状态
        run_info = 'GBDT二分类算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_model_url, run_info)
        return [result_model_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #22
0
def mpc(spark_session, operator_id, file_url, condition):
    """
    mpc多分类
    Classifier trainer based on the Multilayer Perceptron.
    Each layer has sigmoid activation function, output layer has softmax.
    Number of inputs has to be equal to the size of feature vectors.
    Number of outputs has to be equal to the total number of labels.

    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # svm_core函数
        result_model_url = mpc_core(df, condition)
        # 修改计算状态
        run_info = 'mpc多分类算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_model_url, run_info)
        return [result_model_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #23
0
def lr(spark_session, operator_id, file_url, condition):
    """
    逻辑回归多分类
    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # svm_core函数
        result_model_url = lr_core(df, condition)
        # 修改计算状态
        run_info = '逻辑回归多分类算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          result_model_url, run_info)
        return [result_model_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #24
0
def initial_execute_status(execute_user_id, start_nodes):
    """
    每次执行model时,初始化执行状态
    :param execute_user_id:
    :param start_nodes: []
    :return:
    """
    # 查找参与运行的 operator
    operator_list = []
    operator_id_queue = []
    for x in start_nodes:
        operator_id_queue.append(x)
    while len(operator_id_queue) > 0:
        operator_id = operator_id_queue.pop(0)
        if operator_id is None or operator_id == "":
            continue
        operator = OperatorDao.get_operator_by_id(operator_id)
        operator_list.append(operator)
        for x in operator.child_operator_ids.split(','):
            operator_id_queue.append(x)

    # 每个operator 状态初始化为initial
    for operator in operator_list:
        OperatorDao.update_operator_by_id(operator.id, "initial")

    # 追加执行记录
    model_execute = ModelExecute(start_nodes=','.join(start_nodes),
                                 status='initial',
                                 execute_user_id=execute_user_id,
                                 create_time=time.strftime(
                                     "%Y-%m-%d %H:%M:%S", time.localtime()))
    model_execute = ModelExecuteDao.create_model_execute(model_execute)
    if model_execute is False:
        return False
    else:
        return model_execute.id
Example #25
0
def ml_predict(spark_session, operator_id, file_urls, condition):
    """
    机器学习模型预测函数
    :param spark_session:
    :param operator_id:
    :param file_urls: ["modelUrl","predictDataUrl"]
    # 两个输入源 一个是模型 一个是预测数据
    :param condition:
    :return:
    """
    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        for url in file_urls:
            print("------fileUrl:", file_urls)
            if url[-4:] == ".csv":
                url1 = url
            else:
                url0 = url
        df = read_data(spark_session, url1)
        # 预测函数
        result_df = ml_predict_core(spark_session, operator_id, df, url0,
                                    condition)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_df.show()
            result_file_url = save_data(result_df)
            run_info = '预测算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #26
0
def get_run_status_by_project_id(project_id, model_execute_id):
    """
    获取某次执行的状态和其中的每个算子的状态

    :param project_id:
    :param model_execute_id: model的执行记录ID
    :return:
    """

    # 获取 model
    model = ModelDao.get_model_by_project_id(project_id)
    if model is False:
        return False

    # 获取 operator
    operators = OperatorDao.get_operator_by_model_id(model.id)
    if operators is False:
        return False

    # 构造dict
    id_operator_dict = {}
    for operator in operators:
        id_operator_dict[operator.id] = operator

    # 查看此次执行记录(状态、起始节点)
    model_execute_ = ModelExecuteDao.get_model_execute_by_id(model_execute_id)
    operator_from_one_ids = model_execute_.start_nodes.split(',')

    # 查看此次执行的所有节点的状态
    result = dict()
    while operator_from_one_ids:
        item = operator_from_one_ids.pop(0)
        if not (item is None or item == ''):
            result[id_operator_dict[item].id] = {
                "status": id_operator_dict[item].status,
                "log": id_operator_dict[item].run_info
            }
            operator_from_one_ids.extend(
                id_operator_dict[item].child_operator_ids.split(','))

    return {
        "modelExecuteStatus": model_execute_.status,
        "operatorStatus": result
    }
Example #27
0
def get_operate_result_data():
    """
    查看算子运行结果数据
    :return:
    """
    operator_id = request.form.get('operatorId')
    start = int(request.form.get('start'))
    end = int(request.form.get('end'))
    print(operator_id, start, end)
    operator = OperatorDao.get_operator_by_id(operator_id)
    if operator.status != "success":
        return "请执行该节点"
    if operator.operator_output_url is not None:
        operator_output_url = operator.operator_output_url.split('*,')
    else:
        return "没有运行结果"
    result_arr = []
    try:
        for i in range(len(operator_output_url)):
            data = pd.read_csv(operator_output_url[i], encoding='utf-8')
            if len(data) < end:
                end = len(data)
            if start > end:
                result_arr.append({
                    'length': len(data),
                    'data': "请输入合法参数",
                    'position': i
                })
            else:
                data2 = data[int(start):int(end)].to_json(orient='records',
                                                          force_ascii=False)
                result_arr.append({
                    'length': len(data),
                    'data': json.loads(data2),
                    'position': i
                })
        return jsonify(result_arr)
    except:
        traceback.print_exc()
        return "Error,please contact the administrator "
Example #28
0
def vector_indexer(spark_session, operator_id, file_url, condition):
    """
    向量索引转换
    # 向量索引转换旨在转换Vector, 例如:[aa, bb, cc],而非本例中的单独值,由于没有合适的数据可用,暂时把单独值转换成vector实现功能: aa -> [aa]

    :param spark_session:
    :param operator_id:
    :param file_url:
    :param condition:
    :return:
    """

    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 读取数据
        df = read_data(spark_session, file_url)
        # 向量索引转换函数
        result_df = vector_indexer_core(df, condition)
        if isinstance(result_df, str):
            OperatorDao.update_operator_by_id(operator_id, 'error', '',
                                              result_df)
        else:
            # 存储结果
            result_df.show()
            result_file_url = save_data(result_df)
            run_info = '向量索引转换化算子执行成功'
            # 修改计算状态
            OperatorDao.update_operator_by_id(operator_id, 'success',
                                              result_file_url, run_info)
            return [result_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []
Example #29
0
def get_status_model_execute_end(project_id, start_operator_ids):
    """
    获取运行结束后的状态

    :param project_id:
    :param start_operator_ids:
    :return:
    """
    # 获取 model
    model = ModelDao.get_model_by_project_id(project_id)
    if model is False:
        return False

    # 获取 operator
    operators = OperatorDao.get_operator_by_model_id(model.id)
    if operators is False:
        return False

    # 构造dict
    id_operator_dict = {}
    for operator in operators:
        id_operator_dict[operator.id] = operator

    operator_from_one_ids = []
    operator_from_one_ids.extend(start_operator_ids)

    # 从此次执行 起始节点及以后节点的状态
    status_set = set()
    while operator_from_one_ids:
        item = operator_from_one_ids.pop(0)
        if not (item is None or item == ''):
            status_set.add(id_operator_dict[item].status)
            operator_from_one_ids.extend(
                id_operator_dict[item].child_operator_ids.split(','))

    if len(status_set) == 1 and "success" in status_set:
        return "success"
    else:
        return "error"
Example #30
0
def model_operator(operator_id, condition):
    """
    加载模型算子
    :param operator_id:
    :param condition:{"MLModelId": 2, "modelTypeId": 6001}
    :return:
    """

    try:
        # 修改计算状态
        OperatorDao.update_operator_by_id(operator_id, 'running', '', '')
        # 评估函数
        model_file_url = model_operator_core(condition)
        # 修改计算状态
        run_info = '模型算子执行成功'
        OperatorDao.update_operator_by_id(operator_id, 'success',
                                          model_file_url, run_info)
        return [model_file_url]

    except Exception as e:
        run_info = str(e)
        OperatorDao.update_operator_by_id(operator_id, 'error', '', run_info)
        traceback.print_exc()
    return []