Beispiel #1
0
def select(sql):
    orclConfig = conns.getConfig()
    conn = cx_Oracle.connect(orclConfig['username'], orclConfig['password'],
                             orclConfig['url'] + '/' + orclConfig['sid'])
    cursor = conn.cursor()
    try:
        cursor.execute(sql)
        result = cursor.fetchall()

    except Exception as err:
        raise err
    finally:
        cursor.close()  # 关闭游标
        conn.close()  # 关闭连接
    return result
Beispiel #2
0
def create_median(field, data):
    print(datetime.datetime.now())
    data_copy = data.copy()
    data['dt_time'] = pd.to_datetime(data['dt_time'])
    data[['dt_val']] = data[['dt_val']].astype(float)
    dt = data['dt_val']

    #异常集合
    l = detectoutliers(data)

    #处理异常数据l
    # 1.找出0位置
    data_zero = data[(data.dt_val == 0)]
    # 2.找出负值位置
    data_minus = data[(data.dt_val < 0)]

    #补充0后的数据 对比数据
    errors_result = get_errors(data)
    df_period = errors_result[0]
    #3.找出data缺失值
    data_miss = errors_result[1]

    # 4.极值
    data_max = l
    if not l.empty:
        if not l[l.dt_val <= 0].empty:
            a = l.copy()
            b = pd.concat(
                [data_zero.copy(), data_minus.copy()],
                axis=0,
                ignore_index=True)
            data_max = a.append(b).drop_duplicates(subset=['dt_time'],
                                                   keep=False)
    # 5.无异常数列

    # 所有异常数据
    error_all = pd.concat([data_zero, data_miss, data_minus, data_max],
                          axis=0,
                          ignore_index=True)

    data_normal = df_period.copy().append(error_all).drop_duplicates(
        subset=['dt_time'], keep=False)
    # 处理结果封装成标准字段
    datas_normal = result(data_normal.dt_time.tolist(), data, df_period, 0)
    datas_zero = result(data_zero.dt_time.tolist(), data, df_period, 2)
    datas_miss = result(data_miss.dt_time.tolist(), data, df_period, 1)  # 缺失值
    datas_minus = result(data_minus.dt_time.tolist(), data, df_period, 3)
    # 极值处理
    datas_max = result(data_max.dt_time.tolist(), data, df_period, 4)

    # 汇总所有数据
    list_all = pd.concat(
        [datas_normal, datas_zero, datas_miss, datas_minus, datas_max],
        axis=0,
        ignore_index=True)
    list_all['id'] = field  # 添加id列
    list_all = list_all.sort_values(by='dt_time', axis=0,
                                    ascending=True)  # 按时间排序
    list_all.reset_index(drop=True, inplace=True)
    list_all['dt_time'] = list_all['dt_time'].apply(
        lambda x: datetime.datetime.strftime(x, '%Y/%m/%d'))

    list_all['dt_type'] = 'median'
    # 统一修正错误值??????
    dt_error = pd.concat([datas_miss, datas_max], axis=0, ignore_index=True)
    dt_error = dt_error.sort_values(by='dt_time', axis=0,
                                    ascending=True)  # 按时间排序

    list_all = amendment(dt_error, list_all)  #修正错误

    # 类型转换
    list_all = list_all.astype('str')
    data = data.astype('object')
    db = getConfig()
    oracleUtil(
        db['username'] + ':' + db['password'] + '@' + db['url'] + '/' +
        db['sid'], list_all, 'error_out18')
    print(datetime.datetime.now())
Beispiel #3
0
def create_pyculiarity(field, data):
    if len(data) < 14:  #过滤掉小于2周的数据
        return
    print(datetime.datetime.now())
    data['dt_time'] = pd.to_datetime(data['dt_time'])
    data[['dt_val']] = data[['dt_val']].astype(float)
    data_copy = data.copy()
    data.drop("id", axis=1, inplace=True)
    # 调用方法
    results = detect_ts(data, max_anoms=0.3, direction='both', e_value=True)

    # 补充0后的数据 对比数据
    errors_result = get_errors(data_copy)
    df_period = errors_result[0]
    # 3.找出data缺失值
    data_miss = errors_result[1]
    datas_zero = pd.DataFrame()  #0值异常
    datas_miss = pd.DataFrame()  #缺失值异常
    datas_minus = pd.DataFrame()  #负值异常
    datas_max = pd.DataFrame()  #极值异常
    datas_normal = pd.DataFrame()  #无异常集合
    list_all = pd.DataFrame()  #处理后的结果集
    if not results['anoms'].empty:
        # 输入检测结果
        error_all = results['anoms'].copy()
        error_all.drop("expected_value", axis=1, inplace=True)
        error_all.rename(columns={
            'timestamp': 'dt_time',
            'anoms': 'dt_val'
        },
                         inplace=True)

        # 处理异常数据l
        # 1.找出0位置
        data_zero = error_all[(error_all.dt_val == 0)]
        # 2.找出负值位置
        data_minus = error_all[(error_all.dt_val < 0)]

        # 4.极值
        data_max = error_all
        if not error_all.empty:
            a = error_all.copy()
            b = pd.concat(
                [data_zero.copy(), data_minus.copy()],
                axis=0,
                ignore_index=True)
            data_max = a.append(b).drop_duplicates(subset=['dt_time'],
                                                   keep=False)

            # 5.无异常数列
            # 所有异常数据
            error_all = pd.concat([data_zero, data_miss, data_minus, data_max],
                                  axis=0,
                                  ignore_index=True)

            data_normal = df_period.copy().append(error_all).drop_duplicates(
                subset=['dt_time'], keep=False)
            # 处理结果封装成标准字段
            datas_normal = result(data_normal.dt_time.tolist(), data_copy,
                                  df_period, 0)
            datas_zero = result(data_zero.dt_time.tolist(), data_copy,
                                df_period, 2)
            datas_miss = result(data_miss.dt_time.tolist(), data_copy,
                                df_period, 1)  # 缺失值
            datas_minus = result(data_minus.dt_time.tolist(), data_copy,
                                 df_period, 3)
            # 极值处理
            datas_max = result(data_max.dt_time.tolist(), data_copy, df_period,
                               4)
    else:
        datas_miss = result(data_miss.dt_time.tolist(), data_copy, df_period,
                            1)  # 缺失值
        data_normal = df_period.copy().append(datas_miss).drop_duplicates(
            subset=['dt_time'], keep=False)
        # 处理结果封装成标准字段
        datas_normal = result(data_normal.dt_time.tolist(), data_copy,
                              df_period, 0)

    # 汇总所有数据
    list_all = pd.concat(
        [datas_normal, datas_zero, datas_miss, datas_minus, datas_max],
        axis=0,
        ignore_index=True)
    list_all['id'] = field  # 添加id列
    list_all = list_all.sort_values(by='dt_time', axis=0,
                                    ascending=True)  # 按时间排序
    list_all.reset_index(drop=True, inplace=True)
    list_all['dt_time'] = list_all['dt_time'].apply(
        lambda x: datetime.datetime.strftime(x, '%Y/%m/%d'))

    list_all['dt_type'] = 'pyculiarity'
    # 统一修正错误值
    dt_error = pd.concat([datas_miss, datas_max], axis=0, ignore_index=True)
    list_all = amendment(dt_error, list_all)  #修正错误

    # 类型转换
    list_all = list_all.astype('str')
    data_copy = data_copy.astype('object')
    data_copy['dt_time'] = data_copy['dt_time'].apply(
        lambda x: datetime.datetime.strftime(x, '%Y/%m/%d'))

    db = getConfig()
    oracleUtil(
        db['username'] + ':' + db['password'] + '@' + db['url'] + '/' +
        db['sid'], list_all, 'error_out9')
    print(datetime.datetime.now())
Beispiel #4
0
def create_arima(field, data):
    print(datetime.datetime.now())
    #原数据预处理
    data['dt_time'] = data['dt_time'].astype('datetime64')
    data[['dt_val']] = data[['dt_val']].astype(float)
    # data=data.sort_values(by='dt_time', axis=0, ascending=True)  # 按时间排序
    data['dt_time'] = data['dt_time'].apply(
        lambda x: datetime.datetime.strftime(x, '%Y/%m/%d'))
    # # 显示所有列
    # pd.set_option('display.max_columns', None)
    # # 显示所有行
    # pd.set_option('display.max_rows', None)

    model = None
    # 找到拐点,定义model
    inflexion = 0
    if not data[(data.dt_val != 0)].dt_val.empty:
        inflexion = find_inflexion(data[(data.dt_val != 0)].dt_val)
    data_head = None
    data_foot = None
    df = arima_run(data)
    obj = np.array(df).tolist()
    if (inflexion != 0):
        data_head = data[0:inflexion]
        data_foot = data[inflexion:]
        model = get_modes(data_head)
        model = model + get_modes(data_foot)
        inflexion = inflexion - 1
    else:
        model = get_mode(obj)

    it = pd.DataFrame({'key': obj})
    it_copy = it.copy()

    # 删除/选取某列含有特定数值的行
    # 通过~取反,选取不包含数字model的行
    if (inflexion != 0):  #多model不予剔除model
        it = it[~it['key'].isin(model)]

    data_index = []
    #记录所有异常点的坐标位置,在元数据中
    #1.找出0位置
    data_zero = data[(data.dt_val == 0)].dt_time.tolist()
    # 2.找出负值位置
    data_minus = data[(data.dt_val < 0)].dt_time.tolist()

    #3.缺失数据
    # 补充了数据 ,这时补充的数据都是 0
    data2 = data.copy()
    data2['dt_time'] = data2['dt_time'].astype('datetime64')
    df_period = data2.resample('D', on='dt_time').sum()
    df_period = df_period.reset_index()
    data_miss = []
    # 求差集
    a = df_period.copy()
    b = data.copy()
    b['dt_time'] = b['dt_time'].astype('datetime64')
    miss_result = a.append(b).drop_duplicates(subset=['dt_time'], keep=False)
    data_miss = miss_result.dt_time.tolist()

    # 判断没有model的情况  证明没有异常点
    if not (operator.eq(model, obj)):
        end = it.tail(1)["key"].index.values[0]  #最后一个不做处理
        it1 = it_copy[it_copy['key'].isin(model)]

        # 寻找异常点
        for row in it.itertuples(index=True, name='Pandas'):

            if (row[1] < 0 and row[0] > 0 and row[0] < end
                    and data.loc[row[0] + 1].dt_val != 0):

                # module对比值
                module = round(model[0], 3)
                # 动态选取model 有拐点情况
                if (inflexion != 0 and row[0] == inflexion):
                    module = round(model[1], 3)

                #当前点与model的差值
                now = dist(row[1], module)

                # 同为负值的情况
                if (row[0] > 1
                        and round(it_copy.loc[row[0] - 1]["key"], 3) < 0):
                    # print("前一点是异常点(双负):", row[0] - 1, it.loc[[row[0] - 1]]["key"])
                    data_index.append(row[0] - 1)
                    continue
                #前一个点与model的差值
                last = dist(it_copy.loc[[row[0] - 1]].values, module)

                if (round(it_copy.loc[row[0] - 1]["key"], 3) != module
                        and last > 0):
                    # print("前一个点信息:",row[0] - 1, it_copy.loc[[row[0] - 1]].key, "是异常点")
                    data_index.append(row[0] - 1)
                    continue

                # 后一个点与model的差值
                next = dist(it_copy.loc[[row[0] + 1]].values, module)
                # print(it_copy.loc[row[0] + 1]["key"])
                if (round(it_copy.loc[row[0] + 1]["key"], 3) != module
                        and next > 0):
                    # print("当前点信息:", row[0] , it.loc[[row[0]]]["key"], "是异常点")
                    data_index.append(row[0])
                    continue

    if len(data_index) == 1:
        data_index.clear()
    data_index = [i + 1 for i in data_index]  #修正为元数据中的位置
    data_error = []  #异常点集合

    data_allindex = data.index.tolist()
    for i, val in enumerate(data_index):
        if i in data_allindex:
            data_error.append(data.loc[[val]].dt_time[val])

#极值
    Elist = data_zero
    if data_minus:
        Elist = Elist + data_minus
    if data_miss:
        Elist = Elist + data_miss
    data_max = list(set(data_error).difference(set(Elist)))
    #无异常数列
    if data_max:
        Elist = Elist + data_max

    #构标准时间段
    data_normal = data[~data['dt_time'].isin(Elist)]

    #处理结果封装成标准字段
    datas_normal = result(data_normal.dt_time.tolist(), data, df_period, 0)
    datas_zero = result(data_zero, data, df_period, 2)
    datas_miss = result(data_miss, data, df_period, 1)  #缺失值
    datas_minus = result(data_minus, data, df_period, 3)
    #极值处理
    datas_max = result(data_max, data, df_period, 4)

    bj = None
    if not datas_max.empty:
        # 异常二次寻找
        bj = error_filter(datas_normal, datas_max)
        if not bj.empty:
            # 去掉异常数据
            datas_normal = datas_normal.append(bj).drop_duplicates(
                subset=['dt_time'], keep=False)

#汇总所有数据
    list_all = pd.concat(
        [datas_normal, datas_zero, datas_miss, datas_minus, datas_max, bj],
        axis=0,
        ignore_index=True)
    list_all['id'] = field  # 添加id列
    list_all = list_all.sort_values(by='dt_time', axis=0,
                                    ascending=True)  #按时间排序
    list_all.reset_index()
    list_all['dt_time'] = list_all['dt_time'].apply(
        lambda x: datetime.datetime.strftime(x, '%Y/%m/%d'))

    list_all['dt_type'] = 'diff'
    #统一修正错误值
    if not data[(data.dt_val != 0)].dt_val.empty:
        list_all = replace_data_lg(list_all.copy())

    #类型转换
    list_all = list_all.astype('str')
    data = data.astype('object')
    db = getConfig()
    oracleUtil(
        db['username'] + ':' + db['password'] + '@' + db['url'] + '/' +
        db['sid'], list_all, 'error_out')
    print(datetime.datetime.now())