Example #1
0
def main():
    import pickle
    import matplotlib.pyplot as plt
    import pandas as pd
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from skynet.nwp2d import NWPFrame

    icao = 'RJCC'
    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    data_name = 'GLOBAL_METAR-%s.vis' % icao

    data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    # data.drop(['year', 'month', 'day', 'hour', 'min'], axis=1, inplace=True)
    '''
    data = NWPFrame(data)
    data.strtime_to_datetime(date_key='date', fmt='%Y-%m-%d %H:%M', inplace=True)
    data.datetime_to_strtime(date_key='date', fmt='%Y%m%d%H%M', inplace=True)
    '''

    data = NWPFrame(data)
    data.strtime_to_datetime(date_key='date', fmt='%Y%m%d%H%M', inplace=True)
    data.datetime_to_strtime(date_key='date',
                             fmt='%Y-%m-%d %H:%M',
                             inplace=True)
    df_date = data.split_strcol('date',
                                ['year', 'month', 'day', 'hour', 'min'],
                                pattern='[-: ]')
    df_date = df_date[['year', 'month', 'day', 'hour', 'min']]

    data = pd.concat([df_date, data], axis=1)
    print(data)
Example #2
0
def main():
    import pickle
    import matplotlib.pyplot as plt
    import pandas as pd
    import skynet.datasets as skyds
    import skynet.testing as skytest
    from skynet import DATA_DIR
    from skynet.mlcore.feature_selection.filter import pearson_correlation

    # icao = 'RJFK'
    # icao = 'RJFT'
    # icao = 'RJOT'
    # icao = 'RJCC'
    icao = 'RJAA'

    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR
    model_name = 'GLOBAL_METAR-%s.vis' % icao
    data_name = 'GLOBAL_METAR-%s.vis' % icao
    month_keys = [
        'month:1-2', 'month:3-4', 'month:5-6', 'month:7-8', 'month:9-10',
        'month:11-12'
    ]

    # トレーニングデータの準備
    train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    spX_train, spy_train = skytest.preprocessing.normal(train)

    # -- テストデータの準備
    test = pd.read_csv(
        '/Users/makino/PycharmProjects/SkyCC/data/skynet/test_%s.csv' % icao,
        sep=',')
    spX_test, spy_test = skytest.preprocessing.normal(test)

    clfs = {}
    for key in month_keys:
        fets = pearson_correlation(spX_train[key], spy_train[key], depth=30)

        X_train, y_train = spX_train[key][fets].values, spy_train[key].values
        X_train, y_train = skyds.convert.balanced(X_train, y_train)
        X_test, y_test = spX_test[key][fets].values, spy_test[key].values

        clfs[key] = test1(X_train, y_train, X_test, y_test)

    pickle.dump(clfs, open('%s/%s.pkl' % (model_dir, model_name), 'wb'))

    plt.show()
Example #3
0
def test1():
    import matplotlib.pyplot as plt
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from sklearn.preprocessing import StandardScaler
    from sklearn.manifold import TSNE

    icao = 'RJOT'
    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    data_name = 'GLOBAL_METAR-%s.vis' % icao

    data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    fets = skyds.get_init_features()
    target = skyds.get_init_target()

    data = data[fets + target]
    spdata = skyds.convert.split_time_series(data,
                                             data['month'],
                                             date_fmt='%m')
Example #4
0
def predict_by_period(X, clfs, icao, smooth=False, confidence=False):
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler
    import skynet.testing as skytest
    from skynet import DATA_DIR
    from skynet.nwp2d import NWPFrame
    from skynet.mlcore.feature_selection.filter import pearson_correlation

    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    data_name = 'GLOBAL_METAR-%s.vis' % icao

    # トレーニングデータの準備
    train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    spX_train, spy_train = skytest.preprocessing.normal(train)

    pred = {}
    for i_term, key in enumerate(X):
        fets = pearson_correlation(spX_train[key], spy_train[key], depth=30)

        ss = StandardScaler()
        x = X[key][fets]
        x = NWPFrame(ss.fit_transform(x), columns=x.keys())

        # モデルを用意

        if confidence:
            p, c = predict(x, clfs[key], W[icao][i_term], smooth, confidence)
            pred[key] = NWPFrame(copy.deepcopy(X[key][["date"]]))
            pred[key]["visibility"] = adapt_visibility(p, 0, 8)
            c["visibility_rank"] = p
            pred[key] = pd.concat([pred[key], c], axis=1)
            pred[key].index = NWPFrame(pred[key]).strtime_to_datetime(
                'date', fmt='%Y-%m-%d %H:%M')
        else:
            p = predict(x, clfs[key], W[icao][i_term], smooth, confidence)
            pred[key] = copy.deepcopy(X[key][["date"]])
            pred[key]["visibility"] = adapt_visibility(p, 0, 8)
            pred[key]["visibility_rank"] = p
            pred[key].index = pred[key].strtime_to_datetime('date',
                                                            fmt='%Y%m%d%H%M')

    return pred
Example #5
0
def test1():
    import pickle
    import matplotlib.pyplot as plt
    import skynet.datasets as skyds
    import skynet.testing as skytest
    from skynet import DATA_DIR
    from skynet.mlcore.ensemble import RandomForestClassifier
    from sklearn.preprocessing import StandardScaler

    icao = 'RJOT'
    model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR
    model_name = 'GLOBAL_METAR-%s.vis.dev' % icao
    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    data_name = 'GLOBAL_METAR-%s.vis' % icao

    clfs = pickle.load(open('%s/%s.pkl' % (model_dir, model_name), 'rb'))

    data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))

    spX, spy = skytest.preprocessing.test1(data)

    print(spX)

    for key, clf in clfs.items():
        X = spdata[key].iloc[:, :-1]
        y = spdata[key].iloc[:, -1]

        ss = StandardScaler()

        X = ss.fit_transform(X)
        y = y.values

        p = clf.predict(X)

        plt.figure()
        plt.plot(y)
        plt.plot(p)
    plt.show()
Example #6
0
def main():
    import pickle
    import matplotlib.pyplot as plt
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from sklearn.preprocessing import StandardScaler

    icao = 'RJCC'
    model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR
    model_name = 'GLOBAL_METAR-%s.vis' % icao
    data_dir = '%s/skynet' % DATA_DIR
    data_name = 'test_%s' % icao

    clfs = pickle.load(open('%s/%s.pkl' % (model_dir, model_name), 'rb'))

    test = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    fets = skyds.get_init_features()
    target = skyds.get_init_target()

    test = test[fets + target]
    sptest = skyds.convert.split_time_series(test, test['month'], date_fmt='%m')

    for key, clf in clfs.items():
        X = sptest[key].iloc[:, :-1]
        y = sptest[key].iloc[:, -1]

        ss = StandardScaler()

        X = ss.fit_transform(X)
        y = y.values

        p = clf.predict(X)

        plt.figure()
        plt.plot(y)
        plt.plot(p)
    plt.show()
Example #7
0
def Vis_Pred(model, contxt, lclid, test_dir, input_dir, fit_dir, pred_dir,
             errfile):
    import os
    import sys
    import copy
    import csv
    import pickle
    import pandas as pd
    import skynet.nwp2d as npd
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler
    from pathlib import Path

    myname = sys.argv[0]

    print(model)

    csv_test = '%s/%s-%s.csv' % (test_dir, contxt, lclid)
    csv_input = '%s/%s-%s.vis.csv' % (input_dir, contxt, lclid)
    fitfile = '%s/%s-%s.vis.pkl' % (fit_dir, contxt, lclid)
    predfile = '%s/%s-%s.vis.csv' % (pred_dir, contxt, lclid)
    conffile = '%s/confidence_factor/%s-%s.vis.csv' % (pred_dir, contxt, lclid)

    if not os.path.exists(csv_test):
        print("{:s}: [Error] {:s} is not found !".format(myname, csv_test))

        if not os.path.exists(errfile):
            Path(errfile).touch()

        return

    X = pd.read_csv(csv_test)
    X = npd.NWPFrame(X)

    # --- Reading Fitting File & Input File (If Not Existing -> -9999.)
    if not os.path.exists(fitfile) or not os.path.exists(csv_input):
        print("{:s}: [Checked] {:s} or {:s} is not found !".format(
            myname, fitfile, csv_input))
        PRED = []
        for k in range(len(X)):
            pred = [-9999.]
            PRED = PRED + pred

        # - Output(all -9999.)
        outdata = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']]
        outdata['SKYNET-VIS'] = PRED
        outdata.to_csv(
            predfile,
            columns=['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'ARC-GUSTS'],
            index=False,
            header=True)

        # - Output(num of train -> 0)
        f = open(predfile, 'a')
        csv.writer(f, lineterminator='\n').writerow(['FOOT:TRAIN_NUM', 0])
        f.close()
        return

    df_date = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']]
    date_keys = ['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'MIN']
    X['MIN'] = [0] * len(X)
    for key in date_keys:
        if not key == 'HEAD:YEAR':
            X[key] = ['%02d' % int(d) for d in X[key]]

    X.merge_strcol(date_keys, 'date', inplace=True)
    X.drop(date_keys, axis=1, inplace=True)

    # print(X)
    wni_code = skyds.get_init_features('wni')
    X = X[wni_code]

    long_code = skyds.get_init_features('long')
    X.columns = long_code

    vt = len(X)

    pool = skyds.read_csv(csv_input)[long_code]
    sppool = skyds.convert.split_time_series(pool,
                                             date=pool["date"].values,
                                             level="month",
                                             period=2,
                                             index_date=True)

    month_key_info = get_month_key(X['date'][0], period=2)
    X = pd.concat([X, sppool[month_key_info[1]]])

    ss = StandardScaler()
    X = npd.NWPFrame(ss.fit_transform(X), columns=X.keys())
    X = X.iloc[:vt]

    clfs = pickle.load(open(fitfile, 'rb'))[month_key_info[1]]

    p, c = predict(X,
                   clfs,
                   W[lclid][month_key_info[0]],
                   smooth=False,
                   confidence=True)

    vis_pred = adapt_visibility(p)
    vis = npd.NWPFrame(copy.deepcopy(df_date))
    vis['SKYNET-VIS'] = vis_pred
    # vis.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True)
    c = pd.concat([copy.deepcopy(df_date), c], axis=1)
    # c.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True)

    print(os.path.dirname(predfile))

    vis.to_csv(predfile, index=False)
    c.to_csv(conffile, index=False)
Example #8
0
def main():
    import matplotlib.pyplot as plt
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler
    from skynet import USER_DIR, DATA_DIR
    from skynet.datasets import convert

    n_clfs = [
        20,
        20,
        20,
        20,
        20,
        20
    ]

    target = skyds.get_init_target()

    icao = 'RJFK'
    # 'RJSS',
    # 'RJTT',
    # 'ROAH',
    # 'RJOC',
    # 'RJOO',
    # 'RJCH',
    # 'RJFF',
    # 'RJFK',
    # 'RJGG',
    # 'RJNK',
    # 'RJOA',
    # 'RJOT',

    mlalgo = 'stacking'

    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    data_name = 'GLOBAL_METAR-%s.vis' % icao
    train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    test = skyds.read_csv('%s/skynet/test_%s.csv' % (DATA_DIR, icao))

    # 時系列でデータを分割
    sptrain = convert.split_time_series(train, train['date'], level="month", period=2)
    sptest = convert.split_time_series(test, test['date'], level="month", period=2)

    ss = StandardScaler()
    model_dir = '%s/PycharmProjects/SkyCC/trained_models' % USER_DIR

    period_keys = [
        'month:1-2',
        'month:3-4',
        'month:5-6',
        'month:7-8',
        'month:9-10',
        'month:11-12'
    ]

    init_fets = skyds.get_init_features(code='long')
    for i_term, key in enumerate(period_keys):
        os.makedirs(
            '%s/%s/%s/%s'
            % (model_dir, icao, mlalgo, key), exist_ok=True
        )

        # fets = pearson_correlation(sptrain[key][init_fets], sptrain[key][target], depth=30)
        fets = init_fets

        X_train = sptrain[key][fets]
        X_train = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.keys())
        y_train = sptrain[key][target]
        X_train, y_train = convert.balanced(X_train, y_train)

        X_test = sptest[key][fets]
        X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.keys())
        y_test = sptest[key][target]

        save_dir = "%s/%s/%s/%s" % (model_dir, icao, mlalgo, key)
        p_n, score = fit_n_models(mlalgo, n_clfs[i_term], X_train, y_train, X_test, y_test, save_dir)

        p = p_n.mean(axis=1)
        score = score.mean()
        print("f1 mean", score)

        plt.figure()
        plt.plot(y_test)
        plt.plot(p)
    plt.show()
Example #9
0
def main():
    import skynet.nwp2d as npd
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import f1_score

    icao = "RJAA"

    train_data_dir = '%s/MSM/airport.process' % DATA_DIR
    test_data_dir = '%s/skynet' % DATA_DIR

    train = skyds.read_csv('%s/%s.csv' % (train_data_dir, icao))
    test = skyds.read_pkl('%s/test_%s.pkl' % (test_data_dir, icao))

    test['date'] = test['date'].astype(int).astype(str)
    test = npd.NWPFrame(test)
    test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True)
    test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    df_date = test.split_strcol(
        'date', ['year', 'month', 'day', 'hour', 'min'], r'[-\s:]'
    )[['month', 'day', 'hour', 'min']].astype(int)
    test = pd.concat([df_date, test], axis=1)

    fs = skyds.get_init_features()
    target = skyds.get_init_target()

    train = train[fs + target]
    test = test[fs + target]

    train = train[(train['month'] == 1) | (train['month'] == 2)]
    test = test[(test['month'] == 1) | (test['month'] == 2)]

    X = train.iloc[:, :-1]
    y = train.iloc[:, -1]

    ss = StandardScaler()
    X = ss.fit_transform(X)
    y = y.values

    X, y = skyds.convert.balanced(X, y)

    spX, spy = skyds.convert.split_blocks(X, y, n_folds=5)
    print(spX)

    spX, spy = preprocess.split(X, y, n_folds=5)
    X = pd.concat([spX[n] for n in spX if n != 0]).reset_index(drop=True)
    y = pd.concat([spy[n] for n in spy if n != 0]).reset_index(drop=True)

    X_test = spX[0].reset_index(drop=True)
    y_test = spy[0].reset_index(drop=True)

    from sklearn.ensemble import RandomForestClassifier
    clf1 = RandomForestClassifier(max_features=2)
    clf2 = SkySVM()
    meta = LogisticRegression()

    # 学習
    # (注)balancedしてない
    sta = SkyStacking((clf1, clf2), meta)
    sta.fit(X, y)
    p = sta.predict(X_test)

    clf1.fit(X.values, y.values[:, 0])
    print(np.array(X.keys())[np.argsort(clf1.feature_importances_)[::-1]])
    p_rf = clf1.predict(X_test.values)

    # mlxtendのstacking
    sc = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta)
    sc.fit(X.values, y.values[:, 0])
    p_sc = sc.predict(X_test.values)

    y_test = np.where(y_test.values[:, 0] > 1, 0, 1)
    p = np.where(p > 1, 0, 1)
    p_rf = np.where(p_rf > 1, 0, 1)
    p_sc = np.where(p_sc > 1, 0, 1)

    f1 = f1_score(y_true=y_test, y_pred=p)
    print("stacking", f1)

    f1_rf = f1_score(y_true=y_test, y_pred=p_rf)
    print("random forest", f1_rf)

    f1_sc = f1_score(y_true=y_test, y_pred=p_sc)
    print("stacked classifier", f1_sc)