def main(): import pickle import matplotlib.pyplot as plt import pandas as pd import skynet.datasets as skyds from skynet import DATA_DIR from skynet.nwp2d import NWPFrame icao = 'RJCC' data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR data_name = 'GLOBAL_METAR-%s.vis' % icao data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) # data.drop(['year', 'month', 'day', 'hour', 'min'], axis=1, inplace=True) ''' data = NWPFrame(data) data.strtime_to_datetime(date_key='date', fmt='%Y-%m-%d %H:%M', inplace=True) data.datetime_to_strtime(date_key='date', fmt='%Y%m%d%H%M', inplace=True) ''' data = NWPFrame(data) data.strtime_to_datetime(date_key='date', fmt='%Y%m%d%H%M', inplace=True) data.datetime_to_strtime(date_key='date', fmt='%Y-%m-%d %H:%M', inplace=True) df_date = data.split_strcol('date', ['year', 'month', 'day', 'hour', 'min'], pattern='[-: ]') df_date = df_date[['year', 'month', 'day', 'hour', 'min']] data = pd.concat([df_date, data], axis=1) print(data)
def main(): import pickle import matplotlib.pyplot as plt import pandas as pd import skynet.datasets as skyds import skynet.testing as skytest from skynet import DATA_DIR from skynet.mlcore.feature_selection.filter import pearson_correlation # icao = 'RJFK' # icao = 'RJFT' # icao = 'RJOT' # icao = 'RJCC' icao = 'RJAA' data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR model_name = 'GLOBAL_METAR-%s.vis' % icao data_name = 'GLOBAL_METAR-%s.vis' % icao month_keys = [ 'month:1-2', 'month:3-4', 'month:5-6', 'month:7-8', 'month:9-10', 'month:11-12' ] # トレーニングデータの準備 train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) spX_train, spy_train = skytest.preprocessing.normal(train) # -- テストデータの準備 test = pd.read_csv( '/Users/makino/PycharmProjects/SkyCC/data/skynet/test_%s.csv' % icao, sep=',') spX_test, spy_test = skytest.preprocessing.normal(test) clfs = {} for key in month_keys: fets = pearson_correlation(spX_train[key], spy_train[key], depth=30) X_train, y_train = spX_train[key][fets].values, spy_train[key].values X_train, y_train = skyds.convert.balanced(X_train, y_train) X_test, y_test = spX_test[key][fets].values, spy_test[key].values clfs[key] = test1(X_train, y_train, X_test, y_test) pickle.dump(clfs, open('%s/%s.pkl' % (model_dir, model_name), 'wb')) plt.show()
def test1(): import matplotlib.pyplot as plt import skynet.datasets as skyds from skynet import DATA_DIR from sklearn.preprocessing import StandardScaler from sklearn.manifold import TSNE icao = 'RJOT' data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR data_name = 'GLOBAL_METAR-%s.vis' % icao data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) fets = skyds.get_init_features() target = skyds.get_init_target() data = data[fets + target] spdata = skyds.convert.split_time_series(data, data['month'], date_fmt='%m')
def predict_by_period(X, clfs, icao, smooth=False, confidence=False): import skynet.datasets as skyds from sklearn.preprocessing import StandardScaler import skynet.testing as skytest from skynet import DATA_DIR from skynet.nwp2d import NWPFrame from skynet.mlcore.feature_selection.filter import pearson_correlation data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR data_name = 'GLOBAL_METAR-%s.vis' % icao # トレーニングデータの準備 train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) spX_train, spy_train = skytest.preprocessing.normal(train) pred = {} for i_term, key in enumerate(X): fets = pearson_correlation(spX_train[key], spy_train[key], depth=30) ss = StandardScaler() x = X[key][fets] x = NWPFrame(ss.fit_transform(x), columns=x.keys()) # モデルを用意 if confidence: p, c = predict(x, clfs[key], W[icao][i_term], smooth, confidence) pred[key] = NWPFrame(copy.deepcopy(X[key][["date"]])) pred[key]["visibility"] = adapt_visibility(p, 0, 8) c["visibility_rank"] = p pred[key] = pd.concat([pred[key], c], axis=1) pred[key].index = NWPFrame(pred[key]).strtime_to_datetime( 'date', fmt='%Y-%m-%d %H:%M') else: p = predict(x, clfs[key], W[icao][i_term], smooth, confidence) pred[key] = copy.deepcopy(X[key][["date"]]) pred[key]["visibility"] = adapt_visibility(p, 0, 8) pred[key]["visibility_rank"] = p pred[key].index = pred[key].strtime_to_datetime('date', fmt='%Y%m%d%H%M') return pred
def test1(): import pickle import matplotlib.pyplot as plt import skynet.datasets as skyds import skynet.testing as skytest from skynet import DATA_DIR from skynet.mlcore.ensemble import RandomForestClassifier from sklearn.preprocessing import StandardScaler icao = 'RJOT' model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR model_name = 'GLOBAL_METAR-%s.vis.dev' % icao data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR data_name = 'GLOBAL_METAR-%s.vis' % icao clfs = pickle.load(open('%s/%s.pkl' % (model_dir, model_name), 'rb')) data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) spX, spy = skytest.preprocessing.test1(data) print(spX) for key, clf in clfs.items(): X = spdata[key].iloc[:, :-1] y = spdata[key].iloc[:, -1] ss = StandardScaler() X = ss.fit_transform(X) y = y.values p = clf.predict(X) plt.figure() plt.plot(y) plt.plot(p) plt.show()
def main(): import pickle import matplotlib.pyplot as plt import skynet.datasets as skyds from skynet import DATA_DIR from sklearn.preprocessing import StandardScaler icao = 'RJCC' model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR model_name = 'GLOBAL_METAR-%s.vis' % icao data_dir = '%s/skynet' % DATA_DIR data_name = 'test_%s' % icao clfs = pickle.load(open('%s/%s.pkl' % (model_dir, model_name), 'rb')) test = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) fets = skyds.get_init_features() target = skyds.get_init_target() test = test[fets + target] sptest = skyds.convert.split_time_series(test, test['month'], date_fmt='%m') for key, clf in clfs.items(): X = sptest[key].iloc[:, :-1] y = sptest[key].iloc[:, -1] ss = StandardScaler() X = ss.fit_transform(X) y = y.values p = clf.predict(X) plt.figure() plt.plot(y) plt.plot(p) plt.show()
def Vis_Pred(model, contxt, lclid, test_dir, input_dir, fit_dir, pred_dir, errfile): import os import sys import copy import csv import pickle import pandas as pd import skynet.nwp2d as npd import skynet.datasets as skyds from sklearn.preprocessing import StandardScaler from pathlib import Path myname = sys.argv[0] print(model) csv_test = '%s/%s-%s.csv' % (test_dir, contxt, lclid) csv_input = '%s/%s-%s.vis.csv' % (input_dir, contxt, lclid) fitfile = '%s/%s-%s.vis.pkl' % (fit_dir, contxt, lclid) predfile = '%s/%s-%s.vis.csv' % (pred_dir, contxt, lclid) conffile = '%s/confidence_factor/%s-%s.vis.csv' % (pred_dir, contxt, lclid) if not os.path.exists(csv_test): print("{:s}: [Error] {:s} is not found !".format(myname, csv_test)) if not os.path.exists(errfile): Path(errfile).touch() return X = pd.read_csv(csv_test) X = npd.NWPFrame(X) # --- Reading Fitting File & Input File (If Not Existing -> -9999.) if not os.path.exists(fitfile) or not os.path.exists(csv_input): print("{:s}: [Checked] {:s} or {:s} is not found !".format( myname, fitfile, csv_input)) PRED = [] for k in range(len(X)): pred = [-9999.] PRED = PRED + pred # - Output(all -9999.) outdata = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']] outdata['SKYNET-VIS'] = PRED outdata.to_csv( predfile, columns=['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'ARC-GUSTS'], index=False, header=True) # - Output(num of train -> 0) f = open(predfile, 'a') csv.writer(f, lineterminator='\n').writerow(['FOOT:TRAIN_NUM', 0]) f.close() return df_date = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']] date_keys = ['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'MIN'] X['MIN'] = [0] * len(X) for key in date_keys: if not key == 'HEAD:YEAR': X[key] = ['%02d' % int(d) for d in X[key]] X.merge_strcol(date_keys, 'date', inplace=True) X.drop(date_keys, axis=1, inplace=True) # print(X) wni_code = skyds.get_init_features('wni') X = X[wni_code] long_code = skyds.get_init_features('long') X.columns = long_code vt = len(X) pool = skyds.read_csv(csv_input)[long_code] sppool = skyds.convert.split_time_series(pool, date=pool["date"].values, level="month", period=2, index_date=True) month_key_info = get_month_key(X['date'][0], period=2) X = pd.concat([X, sppool[month_key_info[1]]]) ss = StandardScaler() X = npd.NWPFrame(ss.fit_transform(X), columns=X.keys()) X = X.iloc[:vt] clfs = pickle.load(open(fitfile, 'rb'))[month_key_info[1]] p, c = predict(X, clfs, W[lclid][month_key_info[0]], smooth=False, confidence=True) vis_pred = adapt_visibility(p) vis = npd.NWPFrame(copy.deepcopy(df_date)) vis['SKYNET-VIS'] = vis_pred # vis.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True) c = pd.concat([copy.deepcopy(df_date), c], axis=1) # c.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True) print(os.path.dirname(predfile)) vis.to_csv(predfile, index=False) c.to_csv(conffile, index=False)
def main(): import matplotlib.pyplot as plt import skynet.datasets as skyds from sklearn.preprocessing import StandardScaler from skynet import USER_DIR, DATA_DIR from skynet.datasets import convert n_clfs = [ 20, 20, 20, 20, 20, 20 ] target = skyds.get_init_target() icao = 'RJFK' # 'RJSS', # 'RJTT', # 'ROAH', # 'RJOC', # 'RJOO', # 'RJCH', # 'RJFF', # 'RJFK', # 'RJGG', # 'RJNK', # 'RJOA', # 'RJOT', mlalgo = 'stacking' data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR data_name = 'GLOBAL_METAR-%s.vis' % icao train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name)) test = skyds.read_csv('%s/skynet/test_%s.csv' % (DATA_DIR, icao)) # 時系列でデータを分割 sptrain = convert.split_time_series(train, train['date'], level="month", period=2) sptest = convert.split_time_series(test, test['date'], level="month", period=2) ss = StandardScaler() model_dir = '%s/PycharmProjects/SkyCC/trained_models' % USER_DIR period_keys = [ 'month:1-2', 'month:3-4', 'month:5-6', 'month:7-8', 'month:9-10', 'month:11-12' ] init_fets = skyds.get_init_features(code='long') for i_term, key in enumerate(period_keys): os.makedirs( '%s/%s/%s/%s' % (model_dir, icao, mlalgo, key), exist_ok=True ) # fets = pearson_correlation(sptrain[key][init_fets], sptrain[key][target], depth=30) fets = init_fets X_train = sptrain[key][fets] X_train = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.keys()) y_train = sptrain[key][target] X_train, y_train = convert.balanced(X_train, y_train) X_test = sptest[key][fets] X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.keys()) y_test = sptest[key][target] save_dir = "%s/%s/%s/%s" % (model_dir, icao, mlalgo, key) p_n, score = fit_n_models(mlalgo, n_clfs[i_term], X_train, y_train, X_test, y_test, save_dir) p = p_n.mean(axis=1) score = score.mean() print("f1 mean", score) plt.figure() plt.plot(y_test) plt.plot(p) plt.show()
def main(): import skynet.nwp2d as npd import skynet.datasets as skyds from skynet import DATA_DIR from sklearn.preprocessing import StandardScaler from sklearn.metrics import f1_score icao = "RJAA" train_data_dir = '%s/MSM/airport.process' % DATA_DIR test_data_dir = '%s/skynet' % DATA_DIR train = skyds.read_csv('%s/%s.csv' % (train_data_dir, icao)) test = skyds.read_pkl('%s/test_%s.pkl' % (test_data_dir, icao)) test['date'] = test['date'].astype(int).astype(str) test = npd.NWPFrame(test) test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True) test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True) df_date = test.split_strcol( 'date', ['year', 'month', 'day', 'hour', 'min'], r'[-\s:]' )[['month', 'day', 'hour', 'min']].astype(int) test = pd.concat([df_date, test], axis=1) fs = skyds.get_init_features() target = skyds.get_init_target() train = train[fs + target] test = test[fs + target] train = train[(train['month'] == 1) | (train['month'] == 2)] test = test[(test['month'] == 1) | (test['month'] == 2)] X = train.iloc[:, :-1] y = train.iloc[:, -1] ss = StandardScaler() X = ss.fit_transform(X) y = y.values X, y = skyds.convert.balanced(X, y) spX, spy = skyds.convert.split_blocks(X, y, n_folds=5) print(spX) spX, spy = preprocess.split(X, y, n_folds=5) X = pd.concat([spX[n] for n in spX if n != 0]).reset_index(drop=True) y = pd.concat([spy[n] for n in spy if n != 0]).reset_index(drop=True) X_test = spX[0].reset_index(drop=True) y_test = spy[0].reset_index(drop=True) from sklearn.ensemble import RandomForestClassifier clf1 = RandomForestClassifier(max_features=2) clf2 = SkySVM() meta = LogisticRegression() # 学習 # (注)balancedしてない sta = SkyStacking((clf1, clf2), meta) sta.fit(X, y) p = sta.predict(X_test) clf1.fit(X.values, y.values[:, 0]) print(np.array(X.keys())[np.argsort(clf1.feature_importances_)[::-1]]) p_rf = clf1.predict(X_test.values) # mlxtendのstacking sc = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta) sc.fit(X.values, y.values[:, 0]) p_sc = sc.predict(X_test.values) y_test = np.where(y_test.values[:, 0] > 1, 0, 1) p = np.where(p > 1, 0, 1) p_rf = np.where(p_rf > 1, 0, 1) p_sc = np.where(p_sc > 1, 0, 1) f1 = f1_score(y_true=y_test, y_pred=p) print("stacking", f1) f1_rf = f1_score(y_true=y_test, y_pred=p_rf) print("random forest", f1_rf) f1_sc = f1_score(y_true=y_test, y_pred=p_sc) print("stacked classifier", f1_sc)