def test1(): import pickle import pandas as pd import skynet.nwp2d as npd import skynet.datasets as skyds # -- テストデータの準備 test = pickle.load( open( '/Users/makino/PycharmProjects/SkyCC/data/skynet/test_%s.pkl' % icao, 'rb')) test['date'] = test['date'].astype(int).astype(str) test = npd.NWPFrame(test) test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True) test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True) df_date = test.split_strcol('date', ['year', 'month', 'day', 'hour', 'min'], r'[-\s:]')[['month', 'day', 'hour', 'min']].astype(int) test = pd.concat([df_date, test], axis=1) keys = skyds.get_init_features() + skyds.get_init_target() test = test[keys] X_test = test.iloc[:, :-1] y_test = test.iloc[:, -1] X_test = X_test[(X_test['month'] == start_month) | (X_test['month'] == end_month)] y_test = y_test.loc[X_test.index] ss = StandardScaler() X_test = ss.fit_transform(X_test) y_test = y_test.values
def predict_by_period(X, clfs, icao, smooth=False, confidence=False): import skynet.datasets as skyds from sklearn.preprocessing import StandardScaler from skynet.nwp2d import NWPFrame pred = {} for i_term, key in enumerate(X): ss = StandardScaler() x = X[key] fets = skyds.get_init_features('long') x = x[fets] x = NWPFrame(ss.fit_transform(x), columns=x.keys()) # モデルを用意 if confidence: p, c = predict(x, clfs[key], W[icao][i_term], smooth, confidence) pred[key] = NWPFrame(copy.deepcopy(X[key][["date"]])) pred[key]["visibility"] = adapt_visibility(p, 0, 8) c["visibility_rank"] = p pred[key] = pd.concat([pred[key], c], axis=1) # pred[key].index = NWPFrame(pred[key].strtime_to_datetime('date', fmt='%Y-%m-%d %H:%M')) else: p = predict(x, clfs[key], W[icao][i_term], smooth, confidence) pred[key] = copy.deepcopy(X[key][["date"]]) pred[key]["visibility"] = adapt_visibility(p, 0, 8) pred[key]["visibility_rank"] = p # pred[key].index = pred[key].strtime_to_datetime('date', fmt='%Y%m%d%H%M') return pred
def msm_airport_xy(icao, metar_dir, msm_dir, save_dir): import re import pandas as pd import skynet.nwp2d as npd import skynet.datasets as skyds # metar読み込み with open('%s/head.txt' % metar_dir, 'r') as f: header = f.read() header = header.split(sep=',') data15 = pd.read_csv('%s/2015/%s.txt' % (metar_dir, icao), sep=',') data16 = pd.read_csv('%s/2016/%s.txt' % (metar_dir, icao), sep=',') data17 = pd.read_csv('%s/2017/%s.txt' % (metar_dir, icao), sep=',', names=header) metar_data = pd.concat([data15, data16, data17]) metar_data = npd.NWPFrame(metar_data) metar_data.strtime_to_datetime('date', '%Y%m%d%H%M%S', inplace=True) metar_data.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True) metar_data.drop_duplicates('date', inplace=True) metar_data.index = metar_data['date'].values metar_keys = ['date', 'visibility', 'str_cloud'] metar_data = metar_data[metar_keys] metar_data['visibility_rank'] = skyds.to_visrank(metar_data['visibility']) # MSM読み込み msm_data = pd.read_csv('%s/%s.csv' % (msm_dir, icao)) msm_data.rename(columns={'Unnamed: 0': 'date'}, inplace=True) msm_data.index = msm_data['date'].values msm_data.sort_index(inplace=True) fets = skyds.get_init_features() target = skyds.get_init_target() X = npd.NWPFrame(pd.concat([msm_data[fets], metar_data[target]], axis=1)) X.dropna(inplace=True) X.strtime_to_datetime('date', '%Y-%m-%d %H:%M', inplace=True) X.datetime_to_strtime('date', '%Y%m%d%H%M', inplace=True) X = X[fets + target] date = [d for d in X.index if not re.match('2017', d)] train = npd.NWPFrame(X.loc[date]) train['date'] = train.index df_date = train.split_strcol( 'date', ['year', 'month', 'day', 'hour', 'min'], pattern=r'[-\s:]')[['year', 'month', 'day', 'hour', 'min']] train = pd.concat([df_date, train], axis=1) train.drop('date', axis=1, inplace=True) train.to_csv('%s/%s.csv' % (save_dir, icao), index=False)
def test1(): import matplotlib.pyplot as plt import skynet.datasets as skyds from skynet import DATA_DIR from sklearn.preprocessing import StandardScaler from sklearn.manifold import TSNE icao = 'RJOT' data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR data_name = 'GLOBAL_METAR-%s.vis' % icao data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) fets = skyds.get_init_features() target = skyds.get_init_target() data = data[fets + target] spdata = skyds.convert.split_time_series(data, data['month'], date_fmt='%m')
def normal(data): import pandas as pd import skynet.datasets as skyds from sklearn.preprocessing import StandardScaler fets = skyds.get_init_features() target = skyds.get_init_target() data = data[fets + target] date = data['month'] X = data.iloc[:, :-1] y = data.iloc[:, -1] ss = StandardScaler() X = pd.DataFrame(ss.fit_transform(X.values), columns=X.keys()) spX = skyds.convert.split_time_series(X, date, date_fmt='%m') spy = skyds.convert.split_time_series(y, date, date_fmt='%m') return spX, spy
def main(): import pickle import matplotlib.pyplot as plt import skynet.datasets as skyds from skynet import DATA_DIR from sklearn.preprocessing import StandardScaler icao = 'RJCC' model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR model_name = 'GLOBAL_METAR-%s.vis' % icao data_dir = '%s/skynet' % DATA_DIR data_name = 'test_%s' % icao clfs = pickle.load(open('%s/%s.pkl' % (model_dir, model_name), 'rb')) test = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) fets = skyds.get_init_features() target = skyds.get_init_target() test = test[fets + target] sptest = skyds.convert.split_time_series(test, test['month'], date_fmt='%m') for key, clf in clfs.items(): X = sptest[key].iloc[:, :-1] y = sptest[key].iloc[:, -1] ss = StandardScaler() X = ss.fit_transform(X) y = y.values p = clf.predict(X) plt.figure() plt.plot(y) plt.plot(p) plt.show()
def Vis_Pred(model, contxt, lclid, test_dir, input_dir, fit_dir, pred_dir, errfile): import os import sys import copy import csv import pickle import pandas as pd import skynet.nwp2d as npd import skynet.datasets as skyds from sklearn.preprocessing import StandardScaler from pathlib import Path myname = sys.argv[0] print(model) csv_test = '%s/%s-%s.csv' % (test_dir, contxt, lclid) csv_input = '%s/%s-%s.vis.csv' % (input_dir, contxt, lclid) fitfile = '%s/%s-%s.vis.pkl' % (fit_dir, contxt, lclid) predfile = '%s/%s-%s.vis.csv' % (pred_dir, contxt, lclid) conffile = '%s/confidence_factor/%s-%s.vis.csv' % (pred_dir, contxt, lclid) if not os.path.exists(csv_test): print("{:s}: [Error] {:s} is not found !".format(myname, csv_test)) if not os.path.exists(errfile): Path(errfile).touch() return X = pd.read_csv(csv_test) X = npd.NWPFrame(X) # --- Reading Fitting File & Input File (If Not Existing -> -9999.) if not os.path.exists(fitfile) or not os.path.exists(csv_input): print("{:s}: [Checked] {:s} or {:s} is not found !".format( myname, fitfile, csv_input)) PRED = [] for k in range(len(X)): pred = [-9999.] PRED = PRED + pred # - Output(all -9999.) outdata = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']] outdata['SKYNET-VIS'] = PRED outdata.to_csv( predfile, columns=['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'ARC-GUSTS'], index=False, header=True) # - Output(num of train -> 0) f = open(predfile, 'a') csv.writer(f, lineterminator='\n').writerow(['FOOT:TRAIN_NUM', 0]) f.close() return df_date = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']] date_keys = ['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'MIN'] X['MIN'] = [0] * len(X) for key in date_keys: if not key == 'HEAD:YEAR': X[key] = ['%02d' % int(d) for d in X[key]] X.merge_strcol(date_keys, 'date', inplace=True) X.drop(date_keys, axis=1, inplace=True) # print(X) wni_code = skyds.get_init_features('wni') X = X[wni_code] long_code = skyds.get_init_features('long') X.columns = long_code vt = len(X) pool = skyds.read_csv(csv_input)[long_code] sppool = skyds.convert.split_time_series(pool, date=pool["date"].values, level="month", period=2, index_date=True) month_key_info = get_month_key(X['date'][0], period=2) X = pd.concat([X, sppool[month_key_info[1]]]) ss = StandardScaler() X = npd.NWPFrame(ss.fit_transform(X), columns=X.keys()) X = X.iloc[:vt] clfs = pickle.load(open(fitfile, 'rb'))[month_key_info[1]] p, c = predict(X, clfs, W[lclid][month_key_info[0]], smooth=False, confidence=True) vis_pred = adapt_visibility(p) vis = npd.NWPFrame(copy.deepcopy(df_date)) vis['SKYNET-VIS'] = vis_pred # vis.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True) c = pd.concat([copy.deepcopy(df_date), c], axis=1) # c.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True) print(os.path.dirname(predfile)) vis.to_csv(predfile, index=False) c.to_csv(conffile, index=False)
def main(): import matplotlib.pyplot as plt import skynet.datasets as skyds from sklearn.preprocessing import StandardScaler from skynet import USER_DIR, DATA_DIR from skynet.datasets import convert n_clfs = [ 20, 20, 20, 20, 20, 20 ] target = skyds.get_init_target() icao = 'RJFK' # 'RJSS', # 'RJTT', # 'ROAH', # 'RJOC', # 'RJOO', # 'RJCH', # 'RJFF', # 'RJFK', # 'RJGG', # 'RJNK', # 'RJOA', # 'RJOT', mlalgo = 'stacking' data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR data_name = 'GLOBAL_METAR-%s.vis' % icao train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) test = skyds.read_csv('%s/skynet/test_%s.csv' % (DATA_DIR, icao)) # 時系列でデータを分割 sptrain = convert.split_time_series(train, train['date'], level="month", period=2) sptest = convert.split_time_series(test, test['date'], level="month", period=2) ss = StandardScaler() model_dir = '%s/PycharmProjects/SkyCC/trained_models' % USER_DIR period_keys = [ 'month:1-2', 'month:3-4', 'month:5-6', 'month:7-8', 'month:9-10', 'month:11-12' ] init_fets = skyds.get_init_features(code='long') for i_term, key in enumerate(period_keys): os.makedirs( '%s/%s/%s/%s' % (model_dir, icao, mlalgo, key), exist_ok=True ) # fets = pearson_correlation(sptrain[key][init_fets], sptrain[key][target], depth=30) fets = init_fets X_train = sptrain[key][fets] X_train = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.keys()) y_train = sptrain[key][target] X_train, y_train = convert.balanced(X_train, y_train) X_test = sptest[key][fets] X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.keys()) y_test = sptest[key][target] save_dir = "%s/%s/%s/%s" % (model_dir, icao, mlalgo, key) p_n, score = fit_n_models(mlalgo, n_clfs[i_term], X_train, y_train, X_test, y_test, save_dir) p = p_n.mean(axis=1) score = score.mean() print("f1 mean", score) plt.figure() plt.plot(y_test) plt.plot(p) plt.show()
def main(): import skynet.nwp2d as npd import skynet.datasets as skyds from skynet import DATA_DIR from sklearn.preprocessing import StandardScaler from sklearn.metrics import f1_score icao = "RJAA" train_data_dir = '%s/MSM/airport.process' % DATA_DIR test_data_dir = '%s/skynet' % DATA_DIR train = skyds.read_csv('%s/%s.csv' % (train_data_dir, icao)) test = skyds.read_pkl('%s/test_%s.pkl' % (test_data_dir, icao)) test['date'] = test['date'].astype(int).astype(str) test = npd.NWPFrame(test) test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True) test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True) df_date = test.split_strcol( 'date', ['year', 'month', 'day', 'hour', 'min'], r'[-\s:]' )[['month', 'day', 'hour', 'min']].astype(int) test = pd.concat([df_date, test], axis=1) fs = skyds.get_init_features() target = skyds.get_init_target() train = train[fs + target] test = test[fs + target] train = train[(train['month'] == 1) | (train['month'] == 2)] test = test[(test['month'] == 1) | (test['month'] == 2)] X = train.iloc[:, :-1] y = train.iloc[:, -1] ss = StandardScaler() X = ss.fit_transform(X) y = y.values X, y = skyds.convert.balanced(X, y) spX, spy = skyds.convert.split_blocks(X, y, n_folds=5) print(spX) spX, spy = preprocess.split(X, y, n_folds=5) X = pd.concat([spX[n] for n in spX if n != 0]).reset_index(drop=True) y = pd.concat([spy[n] for n in spy if n != 0]).reset_index(drop=True) X_test = spX[0].reset_index(drop=True) y_test = spy[0].reset_index(drop=True) from sklearn.ensemble import RandomForestClassifier clf1 = RandomForestClassifier(max_features=2) clf2 = SkySVM() meta = LogisticRegression() # 学習 # (注)balancedしてない sta = SkyStacking((clf1, clf2), meta) sta.fit(X, y) p = sta.predict(X_test) clf1.fit(X.values, y.values[:, 0]) print(np.array(X.keys())[np.argsort(clf1.feature_importances_)[::-1]]) p_rf = clf1.predict(X_test.values) # mlxtendのstacking sc = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta) sc.fit(X.values, y.values[:, 0]) p_sc = sc.predict(X_test.values) y_test = np.where(y_test.values[:, 0] > 1, 0, 1) p = np.where(p > 1, 0, 1) p_rf = np.where(p_rf > 1, 0, 1) p_sc = np.where(p_sc > 1, 0, 1) f1 = f1_score(y_true=y_test, y_pred=p) print("stacking", f1) f1_rf = f1_score(y_true=y_test, y_pred=p_rf) print("random forest", f1_rf) f1_sc = f1_score(y_true=y_test, y_pred=p_sc) print("stacked classifier", f1_sc)