コード例 #1
0
def test1():
    import pickle
    import pandas as pd
    import skynet.nwp2d as npd
    import skynet.datasets as skyds

    # -- テストデータの準備
    test = pickle.load(
        open(
            '/Users/makino/PycharmProjects/SkyCC/data/skynet/test_%s.pkl' %
            icao, 'rb'))
    test['date'] = test['date'].astype(int).astype(str)
    test = npd.NWPFrame(test)
    test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True)
    test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    df_date = test.split_strcol('date',
                                ['year', 'month', 'day', 'hour', 'min'],
                                r'[-\s:]')[['month', 'day', 'hour',
                                            'min']].astype(int)
    test = pd.concat([df_date, test], axis=1)
    keys = skyds.get_init_features() + skyds.get_init_target()
    test = test[keys]

    X_test = test.iloc[:, :-1]
    y_test = test.iloc[:, -1]

    X_test = X_test[(X_test['month'] == start_month) |
                    (X_test['month'] == end_month)]
    y_test = y_test.loc[X_test.index]

    ss = StandardScaler()
    X_test = ss.fit_transform(X_test)
    y_test = y_test.values
コード例 #2
0
def predict_by_period(X, clfs, icao, smooth=False, confidence=False):
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler
    from skynet.nwp2d import NWPFrame

    pred = {}
    for i_term, key in enumerate(X):
        ss = StandardScaler()
        x = X[key]
        fets = skyds.get_init_features('long')

        x = x[fets]
        x = NWPFrame(ss.fit_transform(x), columns=x.keys())

        # モデルを用意

        if confidence:
            p, c = predict(x, clfs[key], W[icao][i_term], smooth, confidence)
            pred[key] = NWPFrame(copy.deepcopy(X[key][["date"]]))
            pred[key]["visibility"] = adapt_visibility(p, 0, 8)
            c["visibility_rank"] = p
            pred[key] = pd.concat([pred[key], c], axis=1)
            # pred[key].index = NWPFrame(pred[key].strtime_to_datetime('date', fmt='%Y-%m-%d %H:%M'))
        else:
            p = predict(x, clfs[key], W[icao][i_term], smooth, confidence)
            pred[key] = copy.deepcopy(X[key][["date"]])
            pred[key]["visibility"] = adapt_visibility(p, 0, 8)
            pred[key]["visibility_rank"] = p
            # pred[key].index = pred[key].strtime_to_datetime('date', fmt='%Y%m%d%H%M')

    return pred
コード例 #3
0
def msm_airport_xy(icao, metar_dir, msm_dir, save_dir):
    import re
    import pandas as pd
    import skynet.nwp2d as npd
    import skynet.datasets as skyds

    # metar読み込み
    with open('%s/head.txt' % metar_dir, 'r') as f:
        header = f.read()
    header = header.split(sep=',')

    data15 = pd.read_csv('%s/2015/%s.txt' % (metar_dir, icao), sep=',')
    data16 = pd.read_csv('%s/2016/%s.txt' % (metar_dir, icao), sep=',')
    data17 = pd.read_csv('%s/2017/%s.txt' % (metar_dir, icao),
                         sep=',',
                         names=header)

    metar_data = pd.concat([data15, data16, data17])
    metar_data = npd.NWPFrame(metar_data)

    metar_data.strtime_to_datetime('date', '%Y%m%d%H%M%S', inplace=True)
    metar_data.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    metar_data.drop_duplicates('date', inplace=True)
    metar_data.index = metar_data['date'].values

    metar_keys = ['date', 'visibility', 'str_cloud']
    metar_data = metar_data[metar_keys]
    metar_data['visibility_rank'] = skyds.to_visrank(metar_data['visibility'])

    # MSM読み込み
    msm_data = pd.read_csv('%s/%s.csv' % (msm_dir, icao))

    msm_data.rename(columns={'Unnamed: 0': 'date'}, inplace=True)
    msm_data.index = msm_data['date'].values
    msm_data.sort_index(inplace=True)

    fets = skyds.get_init_features()
    target = skyds.get_init_target()
    X = npd.NWPFrame(pd.concat([msm_data[fets], metar_data[target]], axis=1))
    X.dropna(inplace=True)
    X.strtime_to_datetime('date', '%Y-%m-%d %H:%M', inplace=True)
    X.datetime_to_strtime('date', '%Y%m%d%H%M', inplace=True)
    X = X[fets + target]

    date = [d for d in X.index if not re.match('2017', d)]
    train = npd.NWPFrame(X.loc[date])
    train['date'] = train.index
    df_date = train.split_strcol(
        'date', ['year', 'month', 'day', 'hour', 'min'],
        pattern=r'[-\s:]')[['year', 'month', 'day', 'hour', 'min']]
    train = pd.concat([df_date, train], axis=1)
    train.drop('date', axis=1, inplace=True)
    train.to_csv('%s/%s.csv' % (save_dir, icao), index=False)
コード例 #4
0
ファイル: tsne.py プロジェクト: maki-d-wni/skynet
def test1():
    import matplotlib.pyplot as plt
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from sklearn.preprocessing import StandardScaler
    from sklearn.manifold import TSNE

    icao = 'RJOT'
    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    data_name = 'GLOBAL_METAR-%s.vis' % icao

    data = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    fets = skyds.get_init_features()
    target = skyds.get_init_target()

    data = data[fets + target]
    spdata = skyds.convert.split_time_series(data,
                                             data['month'],
                                             date_fmt='%m')
コード例 #5
0
ファイル: preprocessing.py プロジェクト: maki-d-wni/skynet
def normal(data):
    import pandas as pd
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler

    fets = skyds.get_init_features()
    target = skyds.get_init_target()

    data = data[fets + target]
    date = data['month']

    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    ss = StandardScaler()
    X = pd.DataFrame(ss.fit_transform(X.values), columns=X.keys())

    spX = skyds.convert.split_time_series(X, date, date_fmt='%m')
    spy = skyds.convert.split_time_series(y, date, date_fmt='%m')

    return spX, spy
コード例 #6
0
def main():
    import pickle
    import matplotlib.pyplot as plt
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from sklearn.preprocessing import StandardScaler

    icao = 'RJCC'
    model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR
    model_name = 'GLOBAL_METAR-%s.vis' % icao
    data_dir = '%s/skynet' % DATA_DIR
    data_name = 'test_%s' % icao

    clfs = pickle.load(open('%s/%s.pkl' % (model_dir, model_name), 'rb'))

    test = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    fets = skyds.get_init_features()
    target = skyds.get_init_target()

    test = test[fets + target]
    sptest = skyds.convert.split_time_series(test, test['month'], date_fmt='%m')

    for key, clf in clfs.items():
        X = sptest[key].iloc[:, :-1]
        y = sptest[key].iloc[:, -1]

        ss = StandardScaler()

        X = ss.fit_transform(X)
        y = y.values

        p = clf.predict(X)

        plt.figure()
        plt.plot(y)
        plt.plot(p)
    plt.show()
コード例 #7
0
ファイル: vis.py プロジェクト: maki-d-wni/skynet
def Vis_Pred(model, contxt, lclid, test_dir, input_dir, fit_dir, pred_dir,
             errfile):
    import os
    import sys
    import copy
    import csv
    import pickle
    import pandas as pd
    import skynet.nwp2d as npd
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler
    from pathlib import Path

    myname = sys.argv[0]

    print(model)

    csv_test = '%s/%s-%s.csv' % (test_dir, contxt, lclid)
    csv_input = '%s/%s-%s.vis.csv' % (input_dir, contxt, lclid)
    fitfile = '%s/%s-%s.vis.pkl' % (fit_dir, contxt, lclid)
    predfile = '%s/%s-%s.vis.csv' % (pred_dir, contxt, lclid)
    conffile = '%s/confidence_factor/%s-%s.vis.csv' % (pred_dir, contxt, lclid)

    if not os.path.exists(csv_test):
        print("{:s}: [Error] {:s} is not found !".format(myname, csv_test))

        if not os.path.exists(errfile):
            Path(errfile).touch()

        return

    X = pd.read_csv(csv_test)
    X = npd.NWPFrame(X)

    # --- Reading Fitting File & Input File (If Not Existing -> -9999.)
    if not os.path.exists(fitfile) or not os.path.exists(csv_input):
        print("{:s}: [Checked] {:s} or {:s} is not found !".format(
            myname, fitfile, csv_input))
        PRED = []
        for k in range(len(X)):
            pred = [-9999.]
            PRED = PRED + pred

        # - Output(all -9999.)
        outdata = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']]
        outdata['SKYNET-VIS'] = PRED
        outdata.to_csv(
            predfile,
            columns=['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'ARC-GUSTS'],
            index=False,
            header=True)

        # - Output(num of train -> 0)
        f = open(predfile, 'a')
        csv.writer(f, lineterminator='\n').writerow(['FOOT:TRAIN_NUM', 0])
        f.close()
        return

    df_date = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']]
    date_keys = ['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'MIN']
    X['MIN'] = [0] * len(X)
    for key in date_keys:
        if not key == 'HEAD:YEAR':
            X[key] = ['%02d' % int(d) for d in X[key]]

    X.merge_strcol(date_keys, 'date', inplace=True)
    X.drop(date_keys, axis=1, inplace=True)

    # print(X)
    wni_code = skyds.get_init_features('wni')
    X = X[wni_code]

    long_code = skyds.get_init_features('long')
    X.columns = long_code

    vt = len(X)

    pool = skyds.read_csv(csv_input)[long_code]
    sppool = skyds.convert.split_time_series(pool,
                                             date=pool["date"].values,
                                             level="month",
                                             period=2,
                                             index_date=True)

    month_key_info = get_month_key(X['date'][0], period=2)
    X = pd.concat([X, sppool[month_key_info[1]]])

    ss = StandardScaler()
    X = npd.NWPFrame(ss.fit_transform(X), columns=X.keys())
    X = X.iloc[:vt]

    clfs = pickle.load(open(fitfile, 'rb'))[month_key_info[1]]

    p, c = predict(X,
                   clfs,
                   W[lclid][month_key_info[0]],
                   smooth=False,
                   confidence=True)

    vis_pred = adapt_visibility(p)
    vis = npd.NWPFrame(copy.deepcopy(df_date))
    vis['SKYNET-VIS'] = vis_pred
    # vis.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True)
    c = pd.concat([copy.deepcopy(df_date), c], axis=1)
    # c.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True)

    print(os.path.dirname(predfile))

    vis.to_csv(predfile, index=False)
    c.to_csv(conffile, index=False)
コード例 #8
0
ファイル: search.2.py プロジェクト: maki-d-wni/skynet
def main():
    import matplotlib.pyplot as plt
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler
    from skynet import USER_DIR, DATA_DIR
    from skynet.datasets import convert

    n_clfs = [
        20,
        20,
        20,
        20,
        20,
        20
    ]

    target = skyds.get_init_target()

    icao = 'RJFK'
    # 'RJSS',
    # 'RJTT',
    # 'ROAH',
    # 'RJOC',
    # 'RJOO',
    # 'RJCH',
    # 'RJFF',
    # 'RJFK',
    # 'RJGG',
    # 'RJNK',
    # 'RJOA',
    # 'RJOT',

    mlalgo = 'stacking'

    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    data_name = 'GLOBAL_METAR-%s.vis' % icao
    train = skyds.read_csv('%s/%s.csv' % (data_dir, data_name))
    test = skyds.read_csv('%s/skynet/test_%s.csv' % (DATA_DIR, icao))

    # 時系列でデータを分割
    sptrain = convert.split_time_series(train, train['date'], level="month", period=2)
    sptest = convert.split_time_series(test, test['date'], level="month", period=2)

    ss = StandardScaler()
    model_dir = '%s/PycharmProjects/SkyCC/trained_models' % USER_DIR

    period_keys = [
        'month:1-2',
        'month:3-4',
        'month:5-6',
        'month:7-8',
        'month:9-10',
        'month:11-12'
    ]

    init_fets = skyds.get_init_features(code='long')
    for i_term, key in enumerate(period_keys):
        os.makedirs(
            '%s/%s/%s/%s'
            % (model_dir, icao, mlalgo, key), exist_ok=True
        )

        # fets = pearson_correlation(sptrain[key][init_fets], sptrain[key][target], depth=30)
        fets = init_fets

        X_train = sptrain[key][fets]
        X_train = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.keys())
        y_train = sptrain[key][target]
        X_train, y_train = convert.balanced(X_train, y_train)

        X_test = sptest[key][fets]
        X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.keys())
        y_test = sptest[key][target]

        save_dir = "%s/%s/%s/%s" % (model_dir, icao, mlalgo, key)
        p_n, score = fit_n_models(mlalgo, n_clfs[i_term], X_train, y_train, X_test, y_test, save_dir)

        p = p_n.mean(axis=1)
        score = score.mean()
        print("f1 mean", score)

        plt.figure()
        plt.plot(y_test)
        plt.plot(p)
    plt.show()
コード例 #9
0
def main():
    import skynet.nwp2d as npd
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import f1_score

    icao = "RJAA"

    train_data_dir = '%s/MSM/airport.process' % DATA_DIR
    test_data_dir = '%s/skynet' % DATA_DIR

    train = skyds.read_csv('%s/%s.csv' % (train_data_dir, icao))
    test = skyds.read_pkl('%s/test_%s.pkl' % (test_data_dir, icao))

    test['date'] = test['date'].astype(int).astype(str)
    test = npd.NWPFrame(test)
    test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True)
    test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    df_date = test.split_strcol(
        'date', ['year', 'month', 'day', 'hour', 'min'], r'[-\s:]'
    )[['month', 'day', 'hour', 'min']].astype(int)
    test = pd.concat([df_date, test], axis=1)

    fs = skyds.get_init_features()
    target = skyds.get_init_target()

    train = train[fs + target]
    test = test[fs + target]

    train = train[(train['month'] == 1) | (train['month'] == 2)]
    test = test[(test['month'] == 1) | (test['month'] == 2)]

    X = train.iloc[:, :-1]
    y = train.iloc[:, -1]

    ss = StandardScaler()
    X = ss.fit_transform(X)
    y = y.values

    X, y = skyds.convert.balanced(X, y)

    spX, spy = skyds.convert.split_blocks(X, y, n_folds=5)
    print(spX)

    spX, spy = preprocess.split(X, y, n_folds=5)
    X = pd.concat([spX[n] for n in spX if n != 0]).reset_index(drop=True)
    y = pd.concat([spy[n] for n in spy if n != 0]).reset_index(drop=True)

    X_test = spX[0].reset_index(drop=True)
    y_test = spy[0].reset_index(drop=True)

    from sklearn.ensemble import RandomForestClassifier
    clf1 = RandomForestClassifier(max_features=2)
    clf2 = SkySVM()
    meta = LogisticRegression()

    # 学習
    # (注)balancedしてない
    sta = SkyStacking((clf1, clf2), meta)
    sta.fit(X, y)
    p = sta.predict(X_test)

    clf1.fit(X.values, y.values[:, 0])
    print(np.array(X.keys())[np.argsort(clf1.feature_importances_)[::-1]])
    p_rf = clf1.predict(X_test.values)

    # mlxtendのstacking
    sc = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta)
    sc.fit(X.values, y.values[:, 0])
    p_sc = sc.predict(X_test.values)

    y_test = np.where(y_test.values[:, 0] > 1, 0, 1)
    p = np.where(p > 1, 0, 1)
    p_rf = np.where(p_rf > 1, 0, 1)
    p_sc = np.where(p_sc > 1, 0, 1)

    f1 = f1_score(y_true=y_test, y_pred=p)
    print("stacking", f1)

    f1_rf = f1_score(y_true=y_test, y_pred=p_rf)
    print("random forest", f1_rf)

    f1_sc = f1_score(y_true=y_test, y_pred=p_sc)
    print("stacked classifier", f1_sc)