Beispiel #1
0
def msm_airport_xy(icao, metar_dir, msm_dir, save_dir):
    import re
    import pandas as pd
    import skynet.nwp2d as npd
    import skynet.datasets as skyds

    # metar読み込み
    with open('%s/head.txt' % metar_dir, 'r') as f:
        header = f.read()
    header = header.split(sep=',')

    data15 = pd.read_csv('%s/2015/%s.txt' % (metar_dir, icao), sep=',')
    data16 = pd.read_csv('%s/2016/%s.txt' % (metar_dir, icao), sep=',')
    data17 = pd.read_csv('%s/2017/%s.txt' % (metar_dir, icao),
                         sep=',',
                         names=header)

    metar_data = pd.concat([data15, data16, data17])
    metar_data = npd.NWPFrame(metar_data)

    metar_data.strtime_to_datetime('date', '%Y%m%d%H%M%S', inplace=True)
    metar_data.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    metar_data.drop_duplicates('date', inplace=True)
    metar_data.index = metar_data['date'].values

    metar_keys = ['date', 'visibility', 'str_cloud']
    metar_data = metar_data[metar_keys]
    metar_data['visibility_rank'] = skyds.to_visrank(metar_data['visibility'])

    # MSM読み込み
    msm_data = pd.read_csv('%s/%s.csv' % (msm_dir, icao))

    msm_data.rename(columns={'Unnamed: 0': 'date'}, inplace=True)
    msm_data.index = msm_data['date'].values
    msm_data.sort_index(inplace=True)

    fets = skyds.get_init_features()
    target = skyds.get_init_target()
    X = npd.NWPFrame(pd.concat([msm_data[fets], metar_data[target]], axis=1))
    X.dropna(inplace=True)
    X.strtime_to_datetime('date', '%Y-%m-%d %H:%M', inplace=True)
    X.datetime_to_strtime('date', '%Y%m%d%H%M', inplace=True)
    X = X[fets + target]

    date = [d for d in X.index if not re.match('2017', d)]
    train = npd.NWPFrame(X.loc[date])
    train['date'] = train.index
    df_date = train.split_strcol(
        'date', ['year', 'month', 'day', 'hour', 'min'],
        pattern=r'[-\s:]')[['year', 'month', 'day', 'hour', 'min']]
    train = pd.concat([df_date, train], axis=1)
    train.drop('date', axis=1, inplace=True)
    train.to_csv('%s/%s.csv' % (save_dir, icao), index=False)
Beispiel #2
0
def eval_one_forecast(metar: pd.DataFrame, pred: pd.DataFrame, save_dir):
    icao = metar['ICAO'][0]
    metar.index = metar['date']
    metar.sort_index(inplace=True)
    metar.drop_duplicates('date', inplace=True)

    pred = npd.NWPFrame(pred)
    pred_date_cols = ['HEAD:YEAR', 'MON', 'DAY', 'HOUR']
    for key in pred_date_cols:
        if key == 'HEAD:YEAR':
            pred[key] = pred[key].astype(str).str.pad(4, fillchar='0')
        else:
            pred[key] = pred[key].astype(str).str.pad(2, fillchar='0')
    pred.merge_strcol(pred_date_cols, 'date', inplace=True)
    pred.strtime_to_datetime('date', '%Y%m%d%H', inplace=True)
    pred.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    pred.index = pred['date']
    pred.sort_index(inplace=True)
    pred.drop_duplicates('date', inplace=True)

    vis = pd.concat([metar, pred], axis=1)
    vis = vis[['visibility', 'SKYNET-VIS']]
    vis.dropna(inplace=True)
    os.makedirs('%s/time_series' % save_dir, exist_ok=True)
    vis.to_html('%s/time_series/%s.html' % (save_dir, icao))

    vis_level = skyds.get_init_vis_level()
    steps = list(vis_level.values())

    cfm = conf_mat(vis['visibility'], vis['SKYNET-VIS'], steps)
    os.makedirs('%s/confusion_matrix' % save_dir, exist_ok=True)
    cfm.to_html('%s/confusion_matrix/%s.html' % (save_dir, icao))
Beispiel #3
0
def test1():
    import pickle
    import pandas as pd
    import skynet.nwp2d as npd
    import skynet.datasets as skyds

    # -- テストデータの準備
    test = pickle.load(
        open(
            '/Users/makino/PycharmProjects/SkyCC/data/skynet/test_%s.pkl' %
            icao, 'rb'))
    test['date'] = test['date'].astype(int).astype(str)
    test = npd.NWPFrame(test)
    test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True)
    test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    df_date = test.split_strcol('date',
                                ['year', 'month', 'day', 'hour', 'min'],
                                r'[-\s:]')[['month', 'day', 'hour',
                                            'min']].astype(int)
    test = pd.concat([df_date, test], axis=1)
    keys = skyds.get_init_features() + skyds.get_init_target()
    test = test[keys]

    X_test = test.iloc[:, :-1]
    y_test = test.iloc[:, -1]

    X_test = X_test[(X_test['month'] == start_month) |
                    (X_test['month'] == end_month)]
    y_test = y_test.loc[X_test.index]

    ss = StandardScaler()
    X_test = ss.fit_transform(X_test)
    y_test = y_test.values
Beispiel #4
0
def confidence_factor(x, n_class):
    import numpy as np
    import skynet.nwp2d as npd
    mv = np.zeros((len(x), n_class))
    idx = np.arange(len(x))
    for i in range(x.shape[1]):
        mv[idx, x[:, i].astype(int)] += 1

    confac = npd.NWPFrame(mv)

    return confac
Beispiel #5
0
def set_visibility_area_forecast(icao):
    import pickle
    import skynet.nwp2d as npd
    from skynet import DATA_DIR

    af = pickle.load(open(DATA_DIR + "/skynet/metar.before.msm/test_%s.pkl" % icao, "rb"))
    af = af[["date", "VIS"]].rename(columns={"VIS": "visibility"})
    af["visibility_rank"] = convert_visibility_rank(af["visibility"].values)
    af = npd.NWPFrame(af)
    af.index = af.strtime_to_datetime('date', fmt='%Y%m%d%H%M')

    return af
Beispiel #6
0
def set_visibility_metar(icao):
    import skynet.nwp2d as npd
    from skynet import DATA_DIR
    metar = pd.read_csv(DATA_DIR + "/metar/airport_vis/metar_%s.csv" % icao)
    metar = npd.NWPFrame(metar)

    metar['visibility_rank'] = convert_visibility_rank(
        metar['visibility'].values)
    date = metar.strtime_to_datetime('date', fmt='%Y%m%d%H%M')
    metar.index = date

    return metar
Beispiel #7
0
def set_visibility_human_edit(icao):
    import skynet.nwp2d as npd
    from skynet import DATA_DIR
    he = pd.read_csv(
        "%s/after/%s.csv" % (DATA_DIR, icao),
        names=["a", "b", "date", "c", "VIS_after", "d", "e", "f", "g", "h", "i"]
    )

    he = npd.NWPFrame(he[["date", "VIS_after"]])
    he.drop_duplicates("date", keep="first", inplace=True)

    vr = convert_visibility_rank(he["VIS_after"].values)
    he["visibility_rank"] = vr
    he.index = he.strtime_to_datetime('date', fmt='%Y%m%d%H%M')
    he.columns = ["date", "visibility", "visibility_rank"]

    return he
Beispiel #8
0
def Vis_Pred(model, contxt, lclid, test_dir, input_dir, fit_dir, pred_dir,
             errfile):
    import os
    import sys
    import copy
    import csv
    import pickle
    import pandas as pd
    import skynet.nwp2d as npd
    import skynet.datasets as skyds
    from sklearn.preprocessing import StandardScaler
    from pathlib import Path

    myname = sys.argv[0]

    print(model)

    csv_test = '%s/%s-%s.csv' % (test_dir, contxt, lclid)
    csv_input = '%s/%s-%s.vis.csv' % (input_dir, contxt, lclid)
    fitfile = '%s/%s-%s.vis.pkl' % (fit_dir, contxt, lclid)
    predfile = '%s/%s-%s.vis.csv' % (pred_dir, contxt, lclid)
    conffile = '%s/confidence_factor/%s-%s.vis.csv' % (pred_dir, contxt, lclid)

    if not os.path.exists(csv_test):
        print("{:s}: [Error] {:s} is not found !".format(myname, csv_test))

        if not os.path.exists(errfile):
            Path(errfile).touch()

        return

    X = pd.read_csv(csv_test)
    X = npd.NWPFrame(X)

    # --- Reading Fitting File & Input File (If Not Existing -> -9999.)
    if not os.path.exists(fitfile) or not os.path.exists(csv_input):
        print("{:s}: [Checked] {:s} or {:s} is not found !".format(
            myname, fitfile, csv_input))
        PRED = []
        for k in range(len(X)):
            pred = [-9999.]
            PRED = PRED + pred

        # - Output(all -9999.)
        outdata = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']]
        outdata['SKYNET-VIS'] = PRED
        outdata.to_csv(
            predfile,
            columns=['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'ARC-GUSTS'],
            index=False,
            header=True)

        # - Output(num of train -> 0)
        f = open(predfile, 'a')
        csv.writer(f, lineterminator='\n').writerow(['FOOT:TRAIN_NUM', 0])
        f.close()
        return

    df_date = X[['HEAD:YEAR', 'MON', 'DAY', 'HOUR']]
    date_keys = ['HEAD:YEAR', 'MON', 'DAY', 'HOUR', 'MIN']
    X['MIN'] = [0] * len(X)
    for key in date_keys:
        if not key == 'HEAD:YEAR':
            X[key] = ['%02d' % int(d) for d in X[key]]

    X.merge_strcol(date_keys, 'date', inplace=True)
    X.drop(date_keys, axis=1, inplace=True)

    # print(X)
    wni_code = skyds.get_init_features('wni')
    X = X[wni_code]

    long_code = skyds.get_init_features('long')
    X.columns = long_code

    vt = len(X)

    pool = skyds.read_csv(csv_input)[long_code]
    sppool = skyds.convert.split_time_series(pool,
                                             date=pool["date"].values,
                                             level="month",
                                             period=2,
                                             index_date=True)

    month_key_info = get_month_key(X['date'][0], period=2)
    X = pd.concat([X, sppool[month_key_info[1]]])

    ss = StandardScaler()
    X = npd.NWPFrame(ss.fit_transform(X), columns=X.keys())
    X = X.iloc[:vt]

    clfs = pickle.load(open(fitfile, 'rb'))[month_key_info[1]]

    p, c = predict(X,
                   clfs,
                   W[lclid][month_key_info[0]],
                   smooth=False,
                   confidence=True)

    vis_pred = adapt_visibility(p)
    vis = npd.NWPFrame(copy.deepcopy(df_date))
    vis['SKYNET-VIS'] = vis_pred
    # vis.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True)
    c = pd.concat([copy.deepcopy(df_date), c], axis=1)
    # c.rename(columns={'HEAD:YEAR': 'YEAR'}, inplace=True)

    print(os.path.dirname(predfile))

    vis.to_csv(predfile, index=False)
    c.to_csv(conffile, index=False)
Beispiel #9
0
def edit_rate_06_23():
    before_dir = '%s/before' % DATA_DIR
    after_dir = '%s/after' % DATA_DIR

    save_dir = '%s/evaluate/edit_rate' % DATA_DIR
    os.makedirs(save_dir, exist_ok=True)

    before_airports = os.listdir(before_dir)
    before_airports = {
        icao[:4]
        for icao in before_airports if re.match(r'^[A-Z]', icao)
    }

    after_airports = os.listdir(after_dir)
    after_airports = {
        icao[:4]
        for icao in after_airports if re.match(r'^[A-Z]', icao)
    }

    airports_list = list(before_airports & after_airports)
    airports_list.sort()

    # airports_series = pd.Series(airports_list, name='ICAO')
    # airports_series.to_csv('airport_list.csv', index=False)

    df_edit_all = pd.DataFrame()
    for icao in airports_list:
        print(icao)
        df_before = npd.NWPFrame(
            pd.read_csv('%s/%s.txt' % (before_dir, icao), sep=','))
        df_before.strtime_to_datetime(date_key='date',
                                      fmt='%Y%m%d%H%M',
                                      inplace=True)
        df_before.index = df_before['date'].values
        before_bt = [bt for bt in df_before['date'] if bt.hour == 12]
        vt_list = []
        for bt in before_bt:
            vt = [bt + datetime.timedelta(hours=t) for t in range(18)]
            vt_list += vt

        df_before_06_23 = df_before.loc[vt_list]
        df_before_06_23 = npd.NWPFrame(df_before_06_23)
        df_before_06_23.dropna(inplace=True)
        df_before_06_23.datetime_to_strtime(date_key='date',
                                            fmt='%Y-%m-%d %H:%M',
                                            inplace=True)
        df_before_06_23.index = df_before_06_23['date'].values

        h_after = [
            'ICAO', 'BASE', 'VALID', 'precipitation', 'visibility', 'ceiling',
            'temperature', 'wind speed', 'wind direction', 'WX_after', 'u4'
        ]
        df_after = npd.NWPFrame(
            pd.read_csv('%s/%s.csv' % (after_dir, icao), names=h_after))
        df_after.strtime_to_datetime(date_key='VALID',
                                     fmt='%Y%m%d%H%M',
                                     inplace=True)
        df_after.index = df_after['VALID'].values

        df_after_06_23 = npd.NWPFrame(df_after[[
            'BASE', 'VALID', 'visibility', 'ceiling', 'wind speed',
            'wind direction', 'WX_after'
        ]])
        df_after_06_23.strtime_to_datetime(date_key='BASE',
                                           fmt='%Y%m%d%H%M',
                                           inplace=True)
        after_bt = list(df_after_06_23.drop_duplicates('BASE')['BASE'].values)
        vt_list = []
        for bt in after_bt:
            vt = [bt + np.timedelta64(t, 'h') for t in range(6, 24)]
            vt_list += vt

        idx_check = True
        for v in vt_list:
            if v in df_after_06_23.index:
                idx_check = False
                break
        if idx_check:
            df_edit = pd.DataFrame(
                [[icao,
                  len(df_after_06_23), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                columns=[
                    'ICAO', 'All', 'Vis edit', 'VIS edit rate', 'CIG edit',
                    'CIG edit rate', 'WNDSPD edit', 'WNDSPD edit rate',
                    'WDIR edit', 'WDIR edit rate', 'WX edit', 'WX edit rate'
                ])
            df_edit.to_csv('%s/edit_rate_%s.csv' % (save_dir, icao),
                           index=False)
            df_edit_all = pd.concat([df_edit_all, df_edit])
            continue

        df_after_06_23 = df_after_06_23.loc[vt_list]
        df_after_06_23 = npd.NWPFrame(df_after_06_23)
        df_after_06_23.dropna(inplace=True)
        df_after_06_23.datetime_to_strtime(date_key='VALID',
                                           fmt='%Y-%m-%d %H:%M',
                                           inplace=True)
        df_after_06_23.drop_duplicates('VALID', inplace=True)
        df_after_06_23.index = df_after_06_23['VALID'].values

        # print(df_after[['BASE', 'visibility', 'ceiling', 'wind speed', 'wind direction', 'WX_after']])
        vis = pd.concat([df_before_06_23, df_after_06_23], axis=1)
        vis = vis[[
            'ICAO', 'date', 'VIS', 'visibility', 'CLING', 'ceiling', 'WNDSPD',
            'wind speed', 'WNDDIR', 'wind direction', 'WX_after'
        ]]
        '''
        if len(df_before_06_23) > len(df_after_06_23):
            vis_index = df_after_06_23.index
        else:
            vis_index = df_before_06_23.index
        vis = vis.loc[vis_index]
        '''

        vis.rename(columns={
            'VIS': 'VIS_before',
            'visibility': 'VIS_after',
            'CLING': 'CIG_before',
            'ceiling': 'CIG_after',
            'WNDSPD': 'WNDSPD_before',
            'wind speed': 'WNDSPD_after',
            'WNDDIR': 'WNDDIR_before',
            'wind direction': 'WNDDIR_after'
        },
                   inplace=True)

        # vis.reset_index(drop=True, inplace=True)

        vis_range = [
            0, 25, 75, 125, 175, 225, 275, 325, 375, 450, 550, 625, 675, 725,
            775, 850, 950, 1050, 1150, 1250, 1350, 1450, 1550, 1650, 1750,
            1900, 2200, 2700, 3100, 3600, 4400, 4900, 5500, 6500, 7500, 8500,
            9500, 10000
        ]

        vis_values = [
            0, 50, 100, 150, 200, 250, 300, 350, 400, 500, 600, 650, 700, 750,
            800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800,
            2000, 2400, 3000, 3200, 4000, 4800, 5000, 6000, 7000, 8000, 9000,
            9999
        ]

        i = 0
        while True:
            idx = np.where((vis['VIS_before'] > vis_range[i])
                           & (vis['VIS_before'] <= vis_range[i + 1]))[0]
            idx = vis.index[idx]
            vis.loc[idx, 'VIS_before'] = vis_values[i]
            i += 1
            if i == len(vis_values):
                break

        vis[['CIG_before', 'CIG_after']] *= 1 / 0.3048
        cig_range = [
            0, 15, 40, 75, 125, 175, 225, 275, 325, 375, 450, 550, 650, 750,
            850, 950, 1050, 1150, 1250, 1350, 1450, 1550, 1650, 1750, 1850,
            1950, 2050, 2150, 2250, 2350, 2450, 2550, 2650, 2750, 2850, 2950,
            3250, 3750, 4500, 100000
        ]

        cig_values = [
            0, 30, 50, 100, 150, 200, 250, 300, 350, 400, 500, 600, 700, 800,
            900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900,
            2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000,
            3500, 4000, ''
        ]

        i = 0
        while True:
            idx = np.where((vis['CIG_before'] > cig_range[i])
                           & (vis['CIG_before'] <= cig_range[i + 1]))[0]
            idx = vis.index[idx]
            vis.loc[idx, 'CIG_before'] = cig_values[i]
            i += 1
            if i == len(cig_values):
                break

        i = 0
        while True:
            idx = np.where((vis['CIG_after'] > cig_range[i])
                           & (vis['CIG_after'] <= cig_range[i + 1]))[0]
            idx = vis.index[idx]
            vis.loc[idx, 'CIG_after'] = cig_values[i]
            i += 1
            if i == len(cig_values):
                break

        vis['WNDSPD_before'] *= 1 / 0.514444
        vis = vis.round({'WNDSPD_before': 0})
        vis = vis.round({'WNDDIR_before': -1})

        wx_list = ['', '', 'RA', 'SNRA', 'SN', 'SNRA', '', '', '']
        wx_telop = []
        for idx in vis.index:
            if idx in df_before_06_23.index:
                wx_prob = df_before_06_23.loc[idx, [
                    'WX_telop_100', 'WX_telop_200', 'WX_telop_300',
                    'WX_telop_340', 'WX_telop_400', 'WX_telop_430',
                    'WX_telop_500', 'WX_telop_600', 'WX_telop_610'
                ]]

                prc = df_before_06_23.loc[idx, 'PRCRIN_1HOUR_TOTAL']
                if (prc >= 5.) and (prc < 10.):
                    wx = ''
                elif prc >= 10.:
                    wx = '+'
                else:
                    wx = '-'
                tnd = df_before_06_23.loc[idx, 'TNDSTM_prob']
                if tnd >= 50.:
                    wx += 'TS'
                wx += wx_list[int(np.argmax(wx_prob.values))]

                v = df_before_06_23.loc[idx, 'VIS']
                tmpr = df_before_06_23.loc[idx, 'AIRTMP']
                if v < 1000.:
                    wx += ' FG'
                elif (v < 1000.) and (tmpr < 0.):
                    wx += ' FZFG'
                elif (v >= 1000.) and (v <= 5000.):
                    wx += ' BR'

                wx_telop.append(wx)
        wx_telop = pd.DataFrame(wx_telop,
                                index=df_before_06_23.index,
                                columns=['WX_before'])
        vis = pd.concat([vis, wx_telop], axis=1)
        vis['WX_after'] = vis['WX_after'].str.replace(' ', '')
        vis['WX_after'] = vis['WX_after'].str.replace('_', '-')
        vis.dropna(inplace=True)

        vis_edit = np.where(vis['VIS_before'] != vis['VIS_after'], '*', '')
        cig_edit = np.where(vis['CIG_before'] != vis['CIG_after'], '*', '')
        wspd_edit = np.where(vis['WNDSPD_before'] != vis['WNDSPD_after'], '*',
                             '')
        wdir_edit = np.where(vis['WNDDIR_before'] != vis['WNDDIR_after'], '*',
                             '')
        wx_edit = np.where(vis['WX_before'] != vis['WX_after'], '*', '')

        vis_edit_rate = calc_edit_rate(vis_edit)
        cig_edit_rate = calc_edit_rate(cig_edit)
        wspd_edit_rate = calc_edit_rate(wspd_edit)
        wdir_edit_rate = calc_edit_rate(wdir_edit)
        wx_edit_rate = calc_edit_rate(wx_edit)

        vis['VIS edit'] = vis_edit
        vis['CIG edit'] = cig_edit
        vis['WNDSPD edit'] = wspd_edit
        vis['WDIR edit'] = wdir_edit
        vis['WX edit'] = wx_edit

        # 時系列データが欲しい場合は書き足す
        columns = [
            'ICAO', 'date', 'VIS_before', 'VIS_after', 'VIS edit',
            'CIG_before', 'CIG_after', 'CIG edit', 'WNDSPD_before',
            'WNDSPD_after', 'WNDSPD edit', 'WNDDIR_before', 'WNDDIR_after',
            'WDIR edit', 'WX_before', 'WX_after', 'WX edit'
        ]
        vis = vis[columns]

        df_edit = pd.DataFrame([[
            icao,
            len(vis_edit),
            len(vis_edit[vis_edit == '*']), vis_edit_rate,
            len(cig_edit[cig_edit == '*']), cig_edit_rate,
            len(wspd_edit[wspd_edit == '*']), wspd_edit_rate,
            len(wdir_edit[wdir_edit == '*']), wdir_edit_rate,
            len(wx_edit[wx_edit == '*']), wx_edit_rate
        ]],
                               columns=[
                                   'ICAO', 'All', 'Vis edit', 'VIS edit rate',
                                   'CIG edit', 'CIG edit rate', 'WNDSPD edit',
                                   'WNDSPD edit rate', 'WDIR edit',
                                   'WDIR edit rate', 'WX edit', 'WX edit rate'
                               ])

        df_edit.to_csv('%s/edit_rate_%s_06_23.csv' % (save_dir, icao),
                       index=False)
        df_edit_all = pd.concat([df_edit_all, df_edit])

    df_edit_all = df_edit_all.round(3)
    df_edit_all.to_csv('%s/edit_rate_all_06_23.csv' % save_dir, index=False)
Beispiel #10
0
def msm_airport_ft0(icaos):
    import re
    import glob
    import gc
    import pickle
    import pygrib
    import skynet.nwp2d as npd
    from skynet import MSM_INFO, MSM_DATA_DIR

    latlon = npd.msm.get_airport_latlon(icaos)
    sf_latlon_idx = npd.msm.latlon_to_indices(latlon, layer='surface')
    up_latlon_idx = npd.msm.latlon_to_indices(latlon, layer='upper')

    tagid_list = [
        tagid for tagid in MSM_INFO.keys() if re.match(r'4002200', tagid)
    ]
    tagid_list.sort()

    df_airports = {icao: npd.NWPFrame() for icao in icaos}
    for icao in icaos:
        for tagid in tagid_list:
            meta = MSM_INFO[tagid]

            layer = meta['layer']

            path = '%s/%s/bt%s/vt%s%s' % (
                MSM_DATA_DIR, layer, meta['base time'],
                meta['first validity time'], meta['last validity time'])

            path_list = glob.glob('%s/201*' % path)
            path_list.sort()

            for p in path_list:
                print(p)
                msm_files = glob.glob('%s/201*' % p)
                msm_files.sort()
                for f in msm_files:
                    grbs = pygrib.open(f)
                    if layer == 'surface':
                        grb = grbs.select()[0]
                        if grb is None:
                            continue
                        date = grb.validDate.strftime("%Y-%m-%d %H:%M")
                        param = grb.parameterName

                        lat = sf_latlon_idx[icao][0]
                        lon = sf_latlon_idx[icao][1]
                        df_airports[icao].loc[date, param] = grb.values[lat,
                                                                        lon]

                        del grb
                        gc.collect()

                    if layer == 'upper':
                        grb = grbs.select()[0]
                        if grb is None:
                            continue
                        date = grb.validDate.strftime("%Y-%m-%d %H:%M")
                        param = grb.parameterName[:4] + str(grb.level)

                        lat = up_latlon_idx[icao][0]
                        lon = up_latlon_idx[icao][1]
                        df_airports[icao].loc[date, param] = grb.values[lat,
                                                                        lon]

                        del grb
                        gc.collect()

                    grbs.close()

        df_airports[icao].to_csv(
            '/Users/makino/PycharmProjects/SkyCC/data/msm_airport/%s.csv' %
            icao)
    pickle.dump(
        df_airports,
        open('/Users/makino/PycharmProjects/SkyCC/data/all_airport.pkl', 'wb'))
Beispiel #11
0
def main():
    import skynet.nwp2d as npd
    import skynet.datasets as skyds
    from skynet import DATA_DIR, USER_DIR

    os.makedirs(os.getcwd() + "/confusion_matrix", exist_ok=True)

    icao = 'RJFK'
    '''
    'RJOT',
    'RJAA',
    'RJSC',
    'RJSI',
    'RJSK',
    'RJSM',
    'RJSN',
    'RJSS',
    'RJTT',
    'ROAH',
    'RJOC',
    'RJOO',
    # 'RJBB',
    'RJCC',
    'RJCH',
    'RJFF',
    'RJFK',
    'RJGG',
    'RJNK',
    'RJOA',
    '''

    data_dir = '%s/ARC-common/fit_input/JMA_MSM/vis' % DATA_DIR
    model_dir = '%s/ARC-common/fit_output/JMA_MSM/vis' % DATA_DIR
    model_name = 'GLOBAL_METAR-%s.vis' % icao
    data_name = 'GLOBAL_METAR-%s.vis' % icao
    month_keys = ['month:1-2', 'month:3-4', 'month:5-6', 'month:7-8', 'month:9-10', 'month:11-12']

    X = npd.NWPFrame(pd.read_csv('/Users/makino/PycharmProjects/SkyCC/data/skynet/test_%s.csv' % icao, sep=','))

    # 前処理
    # X = preprocessing(X)

    # print(msm_data)

    # 時系列でデータを分割
    spX = skyds.convert.split_time_series(X, X['month'], date_fmt='%m')

    # metar
    metar = set_visibility_metar(icao)
    # metar = sync_values(base=metar, x=X[["visibility_rank"]])
    spmetar = skyds.convert.split_time_series(
        metar,
        metar["date"],
        date_fmt='%Y%m%d%H%M'
    )

    # area_forecast
    af = set_visibility_area_forecast(icao)
    spaf = skyds.convert.split_time_series(
        af,
        date=af["date"],
        date_fmt='%Y%m%d%H%M'
    )

    # human edit
    he = set_visibility_human_edit(icao)
    sphe = skyds.convert.split_time_series(
        he,
        date=he["date"],
        date_fmt='%Y%m%d%H%M'
    )

    # モデルの準備
    '''
    clfs = {}
    model_dir = '%s/PycharmProjects/SkyCC/trained_models' % USER_DIR
    for i_term, key in enumerate(spX):
        clfs[key] = [
            pickle.load(
                open("%s/%s/forest/%s/rf%03d.pkl" % (model_dir, icao, key, i), "rb"))
            for i in range(N_CLF[i_term])
        ]

    clfs = {}
    for i_term, key in enumerate(spX):
        os.makedirs('%s/%s/stacking' % (model_dir, key), exist_ok=True)
        clfs[key] = pickle.load(
            open('%s/%s.pkl' % (model_dir, model_name), 'rb'))
    '''

    clfs = pickle.load(open('%s/%s.pkl' % (model_dir, model_name), 'rb'))

    # パラメーター
    confidence_list = [10, 20, 30, 40, 50, 60, 70, 80, 90]
    confusion_matrix_threshold = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    score = pd.DataFrame()
    for t_num, threshold in enumerate(confidence_list):
        # 時系列毎の予測(コンフィデンスファクター付)
        sppred = predict_by_period(spX, clfs, icao, smooth=False, confidence=True)

        # Xのindexをdateに変換
        # X.index = X.strtime_to_datetime('date', fmt='%Y-%m-%d %H:%M')

        # 編集箇所チェック
        all_samples = 0
        for key in sphe:
            idx_edit = extract_different_index(sphe[key]["visibility_rank"],
                                               spaf[key]["visibility_rank"])
            edit = np.array(["" for _ in range(len(sphe[key]))])
            edit[idx_edit] = "*"
            sphe[key]["edit"] = edit
            all_samples += len(idx_edit)

        # 期間別vis_table
        spvis = {}
        drop_list = [
            "metar_visibility",
            "metar_visibility_rank",
            "human_visibility",
            "human_visibility_rank",
            "skynet_visibility",
            "skynet_visibility_rank",
            'tmp'
        ]
        for key in sppred:
            vis = make_vis_table(metar=spmetar[key], he=sphe[key], ml=sppred[key])
            vis["skynet"] = np.round(vis["skynet_visibility_rank"]).astype(int)
            vis["metar"] = vis["metar_visibility_rank"].astype(int)
            vis["human"] = vis["human_visibility_rank"].astype(int)

            import matplotlib.pyplot as plt
            plt.figure()
            plt.plot(vis['metar_visibility_rank'].values)
            plt.plot(vis['skynet_visibility_rank'].values)
            plt.show()

            vis = vis.rename(columns={"edit": "tmp"})
            vis["edit"] = vis["tmp"]
            vis = vis.drop(drop_list, axis=1)

            spvis[key] = vis
            spvis[key].insert(0, 'date', spX[key]['date'].values)

        # コンフィデンスファクターが閾値以下となる予測値を削除
        samples = 0
        for key in spvis:
            os.makedirs(os.getcwd() + "/confidence_factor/%s/%s" % (key, icao), exist_ok=True)
            confidence_map = spvis[key].loc[:, range(9)].values
            idx = confidence_map.argmax(axis=1)
            c_max = np.array([c[i] for i, c in zip(idx, confidence_map)])
            spvis[key] = spvis[key].iloc[c_max >= threshold]
            spvis[key].to_html(os.getcwd() + "/confidence_factor/%s/%s/%s_%s.html" % (key, icao, icao, threshold))

            edit = spvis[key]["edit"]
            samples += len([e for e in edit if e == "*"])

        print("all sample :", all_samples)
        print("samples :", samples)
        print("samples / all samples = %0.3f" % (samples / all_samples))
        print()

        # 期間別混同行列
        for key in sppred:
            idx = spvis[key].index
            sppred[key] = sppred[key].loc[idx]

        cfm_he, cfm_ml, cfm_heml = make_conf_mat_by_period(metar=spmetar, he=sphe, ml=sppred,
                                                           threats=confusion_matrix_threshold)

        cfm_he1y = 0
        cfm_ml1y = 0
        for key in cfm_he:
            cfm_he1y += cfm_he[key]
            cfm_ml1y += cfm_ml[key]

        os.makedirs(
            os.getcwd() + "/confusion_matrix/%dx%d/metar_vs_human/%s"
            % (len(confusion_matrix_threshold), len(confusion_matrix_threshold), icao), exist_ok=True
        )
        os.makedirs(
            os.getcwd() + "/confusion_matrix/%dx%d/metar_vs_ml/%s"
            % (len(confusion_matrix_threshold), len(confusion_matrix_threshold), icao), exist_ok=True
        )

        cfm_he1y.to_html(
            os.getcwd() + "/confusion_matrix/%dx%d/metar_vs_human/%s/%s_%s.html"
            % (len(confusion_matrix_threshold), len(confusion_matrix_threshold), icao, icao, threshold),
        )
        cfm_ml1y.to_html(
            os.getcwd() + "/confusion_matrix/%dx%d/metar_vs_ml/%s/%s_%s.html"
            % (len(confusion_matrix_threshold), len(confusion_matrix_threshold), icao, icao, threshold),
        )

        print(cfm_he1y)
        print()
        print(cfm_ml1y)
        print()

        mat = cfm_ml1y.values
        rs = mat[0, 0] / (mat[0, 0] + mat[0, 1])
        ps = mat[0, 0] / (mat[0, 0] + mat[1, 0])
        f1 = 2 * rs * ps / (rs + ps)
        print("total: f1 = %0.3f, recall = %0.3f, precision = %0.3f" % (f1, rs, ps))
        print()

        score = score.append([
            [
                threshold,
                # all_samples,
                # samples,
                # samples / all_samples,
                f1,
                rs,
                ps
            ]
        ])

    score.columns = ["confidence",
                     # "number of edit",
                     # "edit reduction", "%",
                     "f1",
                     "recall",
                     "precision"]
    score = score.round(3)
    # score["%"] *= 100
    print(score)

    os.makedirs(os.getcwd() + "/score", exist_ok=True)
    score.to_html(os.getcwd() + "/score/%s.html" % icao, index=False)
Beispiel #12
0
def main():
    import skynet.nwp2d as npd
    import skynet.datasets as skyds
    from skynet import DATA_DIR
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import f1_score

    icao = "RJAA"

    train_data_dir = '%s/MSM/airport.process' % DATA_DIR
    test_data_dir = '%s/skynet' % DATA_DIR

    train = skyds.read_csv('%s/%s.csv' % (train_data_dir, icao))
    test = skyds.read_pkl('%s/test_%s.pkl' % (test_data_dir, icao))

    test['date'] = test['date'].astype(int).astype(str)
    test = npd.NWPFrame(test)
    test.strtime_to_datetime('date', '%Y%m%d%H%M', inplace=True)
    test.datetime_to_strtime('date', '%Y-%m-%d %H:%M', inplace=True)
    df_date = test.split_strcol(
        'date', ['year', 'month', 'day', 'hour', 'min'], r'[-\s:]'
    )[['month', 'day', 'hour', 'min']].astype(int)
    test = pd.concat([df_date, test], axis=1)

    fs = skyds.get_init_features()
    target = skyds.get_init_target()

    train = train[fs + target]
    test = test[fs + target]

    train = train[(train['month'] == 1) | (train['month'] == 2)]
    test = test[(test['month'] == 1) | (test['month'] == 2)]

    X = train.iloc[:, :-1]
    y = train.iloc[:, -1]

    ss = StandardScaler()
    X = ss.fit_transform(X)
    y = y.values

    X, y = skyds.convert.balanced(X, y)

    spX, spy = skyds.convert.split_blocks(X, y, n_folds=5)
    print(spX)

    spX, spy = preprocess.split(X, y, n_folds=5)
    X = pd.concat([spX[n] for n in spX if n != 0]).reset_index(drop=True)
    y = pd.concat([spy[n] for n in spy if n != 0]).reset_index(drop=True)

    X_test = spX[0].reset_index(drop=True)
    y_test = spy[0].reset_index(drop=True)

    from sklearn.ensemble import RandomForestClassifier
    clf1 = RandomForestClassifier(max_features=2)
    clf2 = SkySVM()
    meta = LogisticRegression()

    # 学習
    # (注)balancedしてない
    sta = SkyStacking((clf1, clf2), meta)
    sta.fit(X, y)
    p = sta.predict(X_test)

    clf1.fit(X.values, y.values[:, 0])
    print(np.array(X.keys())[np.argsort(clf1.feature_importances_)[::-1]])
    p_rf = clf1.predict(X_test.values)

    # mlxtendのstacking
    sc = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta)
    sc.fit(X.values, y.values[:, 0])
    p_sc = sc.predict(X_test.values)

    y_test = np.where(y_test.values[:, 0] > 1, 0, 1)
    p = np.where(p > 1, 0, 1)
    p_rf = np.where(p_rf > 1, 0, 1)
    p_sc = np.where(p_sc > 1, 0, 1)

    f1 = f1_score(y_true=y_test, y_pred=p)
    print("stacking", f1)

    f1_rf = f1_score(y_true=y_test, y_pred=p_rf)
    print("random forest", f1_rf)

    f1_sc = f1_score(y_true=y_test, y_pred=p_sc)
    print("stacked classifier", f1_sc)