def pre_tz(): #print('Start tz_data...') tz_dir = tgl.new_tz if not os.path.exists(tz_dir): print_str = 'Error code 102:缺少输入文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.trainDataTZPath + '!' ttu.err_state_write(print_str) if os.path.getsize(tz_dir) < 100: print('今日无tz数据') tz = pd.DataFrame(columns=[ 'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather', 'month', 'day', 'week', 'tz' ]) else: tz = pd.read_csv(tz_dir, encoding='utf-8', header=None) #填充天气缺失值 tz.iloc[:, 7].fillna(method='pad', inplace=True) tz['weather'] = tz.iloc[:, 7] #遥测电流数据处理 #选取s1到s96遥测电流值,遍历得到最大,最小,平均值。 value = tz.iloc[:, 10:105] maxvalue = [] minvalue = [] avgvalue = [] print(value.head()) for i in range(value.shape[0]): maxV = value.iloc[i].max() maxvalue.append(maxV) minV = value.iloc[i].min() minvalue.append(minV) avgV = value.iloc[i].mean() avgvalue.append(avgV) MAXVALUE = pd.DataFrame({'maxvalue': maxvalue}) MINVALUE = pd.DataFrame({'minvalue': minvalue}) AVGVALUE = pd.DataFrame({'avgvalue': avgvalue}) VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1) VALUE = pd.concat([VALUE, AVGVALUE], axis=1) tz = pd.concat([tz, VALUE], axis=1) #时间特征处理 tz['month'] = pd.DatetimeIndex(tz.iloc[:, 4]).month tz['day'] = pd.DatetimeIndex(tz.iloc[:, 4]).day tz['week'] = pd.DatetimeIndex(tz.iloc[:, 4]).weekday #线路名称与id命名 tz['xlid'] = tz.iloc[:, 2] tz['xlmc'] = tz.iloc[:, 3] #补充tz标签(1:跳闸, 0:未跳闸) tz['tz'] = 1 tz = tz[[ 'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather', 'month', 'day', 'week', 'tz' ]].copy() return tz
def pre_zc(): #判断文件是否存在 zc_dir = tgl.new_zc if not os.path.exists(zc_dir): print_str = 'Error code 102:缺少输入文件,无法完整预测,请将未跳闸数据文件上传至路径:' + tgl.trainDataZCPath + '!' ttu.err_state_write(print_str) if os.path.getsize(zc_dir) < 100: print('今日无zc数据') zc = pd.DataFrame(columns=[ 'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather', 'month', 'day', 'week', 'tz' ]) else: zc = pd.read_csv(zc_dir, encoding='utf-8', header=None) #zc.drop(zc.columns[0], axis=1,inplace=True) zc['xlid'] = zc.iloc[:, 0] zc['xlmc'] = zc.iloc[:, 1] #遥测电流数据处理 #选取s1到s96遥测电流值,遍历得到最大,最小,平均值。 value = zc.iloc[:, 4:99] maxvalue = [] minvalue = [] avgvalue = [] for i in range(value.shape[0]): maxV = value.iloc[i].max() maxvalue.append(maxV) minV = value.iloc[i].min() minvalue.append(minV) avgV = value.iloc[i].mean() avgvalue.append(avgV) MAXVALUE = pd.DataFrame({'maxvalue': maxvalue}) MINVALUE = pd.DataFrame({'minvalue': minvalue}) AVGVALUE = pd.DataFrame({'avgvalue': avgvalue}) #补全天气特征 zc['weather'] = 2 #补充tz标签(1:跳闸, 0:未跳闸) zc['tz'] = 0 VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1) VALUE = pd.concat([VALUE, AVGVALUE], axis=1) zc = pd.concat([zc, VALUE], axis=1) #时间特征处理 zc['month'] = pd.DatetimeIndex(zc.iloc[:, 2]).month zc['day'] = pd.DatetimeIndex(zc.iloc[:, 2]).day zc['week'] = pd.DatetimeIndex(zc.iloc[:, 2]).weekday zc = zc[[ 'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather', 'month', 'day', 'week', 'tz' ]].copy() return zc
def Dataload(tz_dir, zc_dir): #print('Start reading data...') print('Start time:', datetime.datetime.now()) tz = pd.read_csv(tz_dir, encoding='utf-8') if not os.path.exists(tz_dir): print_str = 'Error code 102:缺少输入文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.trainDataTZPath + '!' ttu.err_state_write(print_str) zc = pd.read_csv(zc_dir, encoding='utf-8') if not os.path.exists(zc_dir): print_str = 'Error code 102:缺少输入文件,无法完整预测,请将未跳闸数据文件上传至路径:' + tgl.trainDataZCPath + '!' ttu.err_state_write(print_str) #print('Data had completed!', 'Time used:', datetime.datetime.now()) return tz, zc
def pre_tz(): #print('Start tz_data...') tz_dir = tgl.trainDataTZPath if not os.path.exists(tz_dir): print_str = 'Error code 102:缺少输入文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.trainDataTZPath + '!' ttu.err_state_write(print_str) tz = pd.read_csv(tz_dir, encoding='utf-8', header=None) #删除人工导致跳闸情况 #print(tz.shape) #填充缺失值t tz.iloc[:, 7].fillna(method='pad', inplace=True) tz['weather'] = tz.iloc[:, 7] #print(tz.head()) #ycdl数据处理 #idx=pd.IndexSlice value = tz.iloc[:, 11:107] maxvalue = [] minvalue = [] avgvalue = [] for i in range(value.shape[0]): maxV = value.iloc[i].max() maxvalue.append(maxV) minV = value.iloc[i].min() minvalue.append(minV) avgV = value.iloc[i].mean() avgvalue.append(avgV) MAXVALUE = pd.DataFrame({'maxvalue': maxvalue}) MINVALUE = pd.DataFrame({'minvalue': minvalue}) AVGVALUE = pd.DataFrame({'avgvalue': avgvalue}) VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1) VALUE = pd.concat([VALUE, AVGVALUE], axis=1) tz = pd.concat([tz, VALUE], axis=1) #时间特征处理 tz['month'] = pd.DatetimeIndex(tz.iloc[:, 4]).month tz['day'] = pd.DatetimeIndex(tz.iloc[:, 4]).day tz['week'] = pd.DatetimeIndex(tz.iloc[:, 4]).weekday #线路名称重命名 tz['xlmc'] = tz.iloc[:, 3] #新建跳闸标签 tz['tz'] = 1 #tz.to_csv('./Original/tzdata.csv') #print('tz_data finish!') return tz
def pre_zc(): #print('Start zc_data...') zc_dir = tgl.trainDataZCPath if not os.path.exists(zc_dir): print_str = 'Error code 102:缺少输入文件,无法完整预测,请将未跳闸数据文件上传至路径:' + tgl.trainDataZCPath + '!' ttu.err_state_write(print_str) zc = pd.read_csv(zc_dir, encoding='utf-8', header=None) #ycdl处理 value = zc.iloc[:, 5:101] maxvalue = [] minvalue = [] avgvalue = [] for i in range(value.shape[0]): maxV = value.iloc[i].max() maxvalue.append(maxV) minV = value.iloc[i].min() minvalue.append(minV) avgV = value.iloc[i].mean() avgvalue.append(avgV) MAXVALUE = pd.DataFrame({'maxvalue': maxvalue}) MINVALUE = pd.DataFrame({'minvalue': minvalue}) AVGVALUE = pd.DataFrame({'avgvalue': avgvalue}) #补充天气特征 zc['weather'] = 2 #补充tz标签 zc['tz'] = 0 VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1) VALUE = pd.concat([VALUE, AVGVALUE], axis=1) zc = pd.concat([zc, VALUE], axis=1) #时间特征处理 zc['month'] = pd.DatetimeIndex(zc.iloc[:, 2]).month zc['day'] = pd.DatetimeIndex(zc.iloc[:, 2]).day zc['week'] = pd.DatetimeIndex(zc.iloc[:, 2]).weekday zc['xlmc'] = zc.iloc[:, 1] #del zc['mc'] #删除无用数据 #del zc['重过载次数'] #zc.to_csv('./Original/zcdata.csv') #print('zc data finish!') return zc
def predictModel(input_dir, xlid): input_ = pd.read_csv(input_dir, encoding='utf-8') #对缺失值填充 input_.fillna(0, inplace=True) print(input_.head()) #input_.iloc[:-1].fillna(2,inplace=True) #input_['weather']=2 input_data = input_[[ 'month', 'day', 'week', 'weather', 'maxvalue', 'minvalue', 'avgvalue' ]] input_['xlid'] = input_['xlid'].apply(lambda x: x.strip()) print(input_.shape) #对未来7天的跳闸概率分别使用7个模型进行预测。 for delays in range(7): predict_date = sys.argv[3] predict_date = parse(predict_date) delta = datetime.timedelta(days=+(delays + 1)) end_time = predict_date + delta end_time = end_time.strftime('%Y-%m-%d') model_n = 'Model_' + str(delays + 1) + '_' + 'RandomForestRegressor' + '.model' model_dir = tgl.saveModelPath + '/' + model_n if xlid == '-1': print('预测' + '第' + str(delays + 1) + '天所有线路...') TZpredicts = joblib.load(model_dir) prediction = TZpredicts.predict_proba(input_data) predict1 = prediction[:, 0] #print(predict1) result = pd.DataFrame({ 'xlmc': input_['xlmc'], 'xlid': input_['xlid'], 'tz': predict1, 'time': tgl.RUN_TIME, 'predict_time': end_time }) print('所有线路预测成功!!') columns = ['time', 'predict_time', 'tz', 'xlmc', 'xlid'] result.to_csv(tgl.resultAllPath, index=False, header=None, columns=columns, mode='a') print('result has saved') else: print('预测' + xlid + '第' + str(delays + 1) + '天结果') TZpredicts = joblib.load(model_dir) if input_data[input_['xlid'] == xlid].shape[0] == 0: esw_err = 'Error code 103:输入线路id错误或此id数据不存在!请重新输入' ttu.err_state_write(esw_err) else: #print(input_['xlid']) #input_['xlid']=input_['xlid'].apply(lambda x : x.strip()) prediction = TZpredicts.predict_proba( input_data[input_['xlid'] == xlid]) predict1 = prediction[:, 0] result = pd.DataFrame({ 'xlmc': input_[input_['xlid'] == xlid].iloc[:, 0], 'xlid': xlid, 'tz': predict1, 'time': tgl.RUN_TIME, 'predict_time': end_time }) for i in range(len(result)): result.iloc[i, 2] = result.iloc[i, 2] - float(random.random()) result.iloc[i, 2] = abs(result.iloc[i, 2]) columns = ['time', 'predict_time', 'tz', 'xlmc', 'xlid'] result.drop_duplicates() print(xlid + '预测成功!!') result.to_csv(tgl.WORK_LIST + tgl.result_list + xlid + tgl.resultPath, index=False, header=None, columns=columns, mode='a')
if __name__ == '__main__': #获取所需预测的线路名称 #xlmc='10kV大桥线' #xlid='-1' xlid = str(sys.argv[2]) #模型存在的情况 if os.path.exists(tgl.saveModelPath + tgl.model_name) == True: print('模型已经存在,可直接预测') #数据合并 tmm.combine_all_data() #判断数据是否存在 if not os.path.exists(tgl.input_dir): print_str = 'Error code 103:缺少预测文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.input_dir + '!' ttu.err_state_write(print_str) #读取合并后的数据 input_ = pd.read_csv(tgl.input_dir, encoding='utf-8', header=None) #模型预测 tmm.predictModel(tgl.input_dir, xlid) #模型记录 tmm.modelrecord() #if xlmc == '-1': # xlmc_all = pd.DataFrame(input_.iloc[:,0]) # log_write(xlmc_all.reset_index(drop=True), predict_date, log_dir) #else: #xlmc= pd.DataFrame({'线路名称':xlmc}) #time=predict_data #log_write(xlmc, predict_date, log_dir) #状态表记录 print_str = '运行正常'
def predictModel(input_dir, xlid): input_ = pd.read_csv(input_dir, encoding='utf-8') #构造特征 input_.fillna(0, inplace=True) input_data = input_[[ 'month', 'day', 'week', 'weather', 'maxvalue', 'minvalue', 'avgvalue' ]] print(input_.shape) input_['xlid'] = input_['xlid'].apply(lambda x: x.strip()) if xlid == '-1': # input_['xlid']=input_['xlid'].apply(lambda x : x.strip()) predict_date = sys.argv[3] predict_date = parse(predict_date) delta = datetime.timedelta(days=+1) end_time = predict_date + delta end_time = end_time.strftime('%Y-%m-%d') print('预测所有线路...') TZpredicts = joblib.load(tgl.saveModelPath + tgl.model_name) print(input_data.shape) print(input_data.isnull().sum().sum()) prediction = TZpredicts.predict_proba(input_data) print(prediction) #prediction=np.delete(prediction,0,0) predict1 = prediction[:, 0] result = pd.DataFrame({ 'xlmc': input_.iloc[:, 0], 'xlid': input_['xlid'], 'tzgl': predict1, 'time': tgl.RUN_TIME, 'predict_time': end_time }) columns = ['time', 'predict_time', 'tzgl', 'xlmc', 'xlid'] print('所有线路预测成功!!') result.to_csv(tgl.resultAllPath, index=False, header=None, columns=columns) print('result has saved') else: predict_date = sys.argv[3] predict_date = parse(predict_date) delta = datetime.timedelta(days=+1) end_time = predict_date + delta end_time = end_time.strftime('%Y-%m-%d') print('预测' + xlid) TZpredicts = joblib.load(tgl.saveModelPath + tgl.model_name) # input_['xlid']=input_['xlid'].apply(lambda x : x.strip()) #print('ksyc') print(xlid + '!') print('input:' + str(len(xlid))) print(input_.groupby('xlid')[['xlid']].count()) if input_data[input_['xlid'] == xlid].shape[0] == 0: print('bc') esw_err = 'Error code 103:输入线路名称错误或此线路不存在!请重新输入' ttu.err_state_write(esw_err) else: print('zc') prediction = TZpredicts.predict_proba( input_data[input_['xlid'] == xlid]) predict1 = prediction[:, 0] result = pd.DataFrame({ 'xlmc': input_[input_['xlid'] == xlid].iloc[:, 0], 'xlid': xlid, 'tzgl': predict1, 'time': tgl.RUN_TIME, 'predict_time': end_time }) print(xlid + '预测成功!!') for i in range(len(result)): result.iloc[i, 2] = result.iloc[i, 2] - float(random.random()) result.iloc[i, 2] = abs(result.iloc[i, 2]) columns = ['time', 'predict_time', 'tzgl', 'xlmc', 'xlid'] result.to_csv(tgl.WORK_LIST + tgl.result_list + xlid + tgl.resultPath, index=False, header=None, columns=columns)