def show_test(test, predict_test): y_test = test['label'] evalute_delay(y_test, predict_test) ans = truth_predict(y_test, predict_test) dic = {'y': ans} predict = pd.DataFrame(dic) predict['timestamp'] = test['timestamp'] predict['value'] = test['value'] plot_ans(predict, ratio=1)
def show_train(y_train, df): x = pd.read_csv('formal.csv') x = x.drop(['label'], axis=1).values x = np.delete(x, 1, axis=1) predict_train = rf.predict(x) evalute_delay(y_train, predict_train) ans = truth_predict(y_train, predict_train) dic = {'y': ans} predict = pd.DataFrame(dic) predict['timestamp'] = df['timestamp'] predict['value'] = df['value'] plot_ans(predict, ratio=1)
def benchmark_roubust_ksigma(num): file_name = 'E:/javacode/AIOPS/data/series/' log = [] print num train = pd.read_csv(file_name + num + '_train.csv') t = TSlist(train) test = pd.read_csv(file_name + num + '_test.csv') t.append_df(test) t.fill_missed_median() timestamp = test['timestamp'].values span = timestamp[1] - timestamp[0] y = [] w = 3600 * 3 / span print 'sample num in each hour:', w / 3 for i in range(1, len(timestamp)): if i % 5000 == 0: print i * 100.0 / len(timestamp), "%" #构造数据块 w_now = t.get_series(timestamp[i], w + 1) yest_stamp = timestamp[i] - 24 * 3600 w_yest1 = t.get_series(yest_stamp, w + 1) yest_stamp2 = timestamp[i] - 24 * 3600 + 3600 * 3 #后移三个小时 w_yest2 = t.get_series(yest_stamp2, w) last_week = timestamp[i] - 24 * 3600 * 7 w_last_week1 = t.get_series(last_week, w + 1) last_week2 = timestamp[i] - 24 * 3600 * 7 + 3600 * 3 w_last_week2 = t.get_series(last_week2, w) w_yest1.extend(w_yest2) w_last_week1.extend(w_last_week2) w_last_week1.extend(w_yest1) w_last_week1.extend(w_now) X_formal = np.array(w_last_week1) #algorithm ans = roubustResidual.roubust_residual_ksigma(X_formal, w) #ans = ewmaResidual.ewma_residual_ksigma(X_formal) #ans = ewmaResidual.ewma_residual_lof(X_formal) y.append(ans) tp, tn, fp, fn, precison, recall, f1 = evalute_delay(test['label'], y, delay=7) log.append([num, tp, tn, fp, fn, precison, recall, f1]) log = pd.DataFrame(log, columns=[ 'kpi id', 'tp', 'tn', 'fp', 'fn', 'precison', 'recall', 'fscore' ]) #log.to_csv('log/'+str(num)+' ewma_lof.csv') log.to_csv('log/' + str(num) + ' roubust_ksigma.csv')
'minus_mean_now_yes', 'minus_std_now_past', 'minus_std_now_yes', 'r_mean_now_past', 'r_mean_now_yes', '1', '2', '3', '4', '5', '6', '7', '8', 'ewma', 'pre_minus', 'pre_rate', 'yes_minus', 'yes_rate', 'label' ] for num in numbers: print num df = pd.read_csv(file_name + num + '_train.csv') span = df['timestamp'][1] - df['timestamp'][0] ts = TSlist(df) df = df[3600 * 24 / span + 15:] print 'start to fill missed value' ts.fill_missed_median() print 'filling over' print 'start to train' #---------- rf, scale = trainScaleSmote(df, ts, columns) print 'training done' test = pd.read_csv(file_name + num + '_test.csv') ts.append_df(test) ts.fill_missed_median() predict_test = benchmark(ts, test, rf, scale) precison, recall, f1 = evalute_delay(test['label'], predict_test) log.append([num, precison, recall, f1]) log_tmp = pd.DataFrame(log, columns=['kpi id', 'precison', 'recall', 'fscore']) log_tmp.to_csv(num + 'scale_smote.csv') #log = pd.DataFrame(log, columns=['kpi id','precison','recall','fscore']) #log.to_csv('benchmark_dumpT.csv')
from TSlist import TSlist df = pd.read_csv('data/series/7_train.csv') span = df['timestamp'][1] - df['timestamp'][0] ts = TSlist(df) df = df[3600 * 24 / span + 5:] print 'start to fill missed value' ts.fill_missed_median() print 'filling over' y_train = df['label'].values mark = extract_Tsamples(y_train) columns = [ 'value', 'mean_now', 'std_now', 'minus_mean_now_past', 'minus_mean_now_yes', 'minus_std_now_past', 'minus_std_now_yes', 'r_mean_now_past', 'r_mean_now_yes', '1', '2', '3', '4', '5', '6', '7', '8', 'ewma', 'pre_minus', 'pre_rate', 'yes_minus', 'yes_rate', 'label' ] print 'start to train' rf = trainDumpT(df, ts, columns, mark, ratio=3) #show_train(y_train, df) print 'training done' test = pd.read_csv('data/series/7_test.csv') ts.append_df(test) ts.fill_missed_median() predict_test = benchmark(ts, test, rf) #show_test(test, predict_test) print evalute_delay(test['label'], predict_test)