Exemple #1
0
def test_windows_model():
    from windows_model import model_t2m_file, model_rh2m_file, model_w10m_file, exract_feature

    test_file = "../data/ai_challenger_wf2018_testb1_20180829-20181028.nc"
    out_time = pprd_time = '2018-10-28 03'
    if (not os.path.exists(data_path_1 + "test.csv")):
        transfer_data_to_csv(test_file, data_path_1 + "test.csv")
    test_df = load_data(data_path_1 + "test.csv")
    # test_df = fill_missing_data(test_df)
    for col in [
            'psur_obs', 't2m_obs', 'q2m_obs', 'w10m_obs', 'd10m_obs',
            'rh2m_obs', 'u10m_obs', 'v10m_obs', 'RAIN_obs'
    ]:
        col_filled = col.split(
            '_')[0] + '_M' if 'psur' not in col else 'psfc_M'
        test_df[col].fillna(test_df[col_filled], inplace=True)  #用超算值填充
    test_df_processed = exract_feature(test_df, True)

    # 加载模型
    t2m_model = pickle.load(open(model_t2m_file, 'rb'))
    rh2m_model = pickle.load(open(model_rh2m_file, 'rb'))
    w10m_model = pickle.load(open(model_w10m_file, 'rb'))

    df_submit = predict(test_df_processed, [t2m_model, rh2m_model, w10m_model],
                        prd_time)  # 预测并打印输出
    df_submit.to_csv(output_path + ans_name, index=False)

    # 计算分数
    anen_file = output_path + ans_name
    print_score(fore_file, obs_file, anen_file)
Exemple #2
0
def test_old_days_model3():
    from old_days_model3 import model_t2m_file, model_rh2m_file, model_w10m_file, exract_feature

    df = load_data("../data/all_data.csv")

    df_processed = exract_feature(df)
    del df

    test_idx = df_processed[lambda x: x.station_date_time.str.split('_').apply(
        lambda x: x[1]) == prd_time.replace('-', '').replace(' ', '')].index
    df_test = df_processed.loc[test_idx]
    del df_processed

    # df_test.drop(df_test.columns[33:45], axis=1, inplace=True)
    df_test.drop(pd.Index([
        'psur_obs', 't2m_obs', 'q2m_obs', 'w10m_obs', 'd10m_obs', 'rh2m_obs',
        'u10m_obs', 'v10m_obs', 'RAIN_obs', 't2m_obj', 'rh2m_obj', 'w10m_obj'
    ]),
                 axis=1,
                 inplace=True)
    df_test_X = pd.concat(
        [df_test[df_test.columns[1]], df_test[df_test.columns[3:]]], axis=1)

    # 加载模型
    t2m_model = pickle.load(open(model_t2m_file, 'rb'))
    rh2m_model = pickle.load(open(model_rh2m_file, 'rb'))
    w10m_model = pickle.load(open(model_w10m_file, 'rb'))

    df_submit = pd.DataFrame([
        [
            f'{item[0]}_{int(item[2]):02d}'
            for item in df_test.station_date_time.str.split('_')
        ],
        (df_test.t2m_M + t2m_model.predict(df_test_X)).tolist(),
        (df_test.rh2m_M + rh2m_model.predict(df_test_X)).tolist(),
        (df_test.w10m_M + w10m_model.predict(df_test_X)).tolist(),
    ]).T.rename(columns={
        0: 'FORE_data',
        1: 't2m',
        2: 'rh2m',
        3: 'w10m'
    })

    df_submit.to_csv(output_path + ans_name, index=False)  # 预测并打印输出

    # 计算分数
    anen_file = output_path + ans_name
    print_score(fore_file, obs_file, anen_file)
Exemple #3
0
def test_windows_station_model():
    from windows_station_model import model_t2m_file, model_rh2m_file, model_w10m_file, exract_feature

    test_file = "../data/ai_challenger_wf2018_testb1_20180829-20181028.nc"
    out_time = pprd_time = '2018-10-28 03'
    if (not os.path.exists(data_path_1 + "test.csv")):
        transfer_data_to_csv(test_file, data_path_1 + "test.csv")
    test_df = load_data(data_path_1 + "test.csv")
    # test_df = fill_missing_data(test_df)
    for col in [
            'psur_obs', 't2m_obs', 'q2m_obs', 'w10m_obs', 'd10m_obs',
            'rh2m_obs', 'u10m_obs', 'v10m_obs', 'RAIN_obs'
    ]:
        col_filled = col.split(
            '_')[0] + '_M' if 'psur' not in col else 'psfc_M'
        test_df[col].fillna(test_df[col_filled], inplace=True)  #用超算值填充

    station_id = [
        90001, 90002, 90003, 90004, 90005, 90006, 90007, 90008, 90009, 90010
    ]
    test_station_df = {}
    test_df_processed = exract_feature(test_df, True)
    for id in station_id:
        test_station_df[str(id)] = test_df_processed[
            test_df_processed["stations"] == id]

    # 加载模型
    t2m_model = pickle.load(open(model_t2m_file, 'rb'))
    rh2m_model = pickle.load(open(model_rh2m_file, 'rb'))
    w10m_model = pickle.load(open(model_w10m_file, 'rb'))

    df_submit = predict(test_station_df[str(station_id[0])], [
        t2m_model[str(station_id[0])], rh2m_model[str(station_id[0])],
        w10m_model[str(station_id[0])]
    ], prd_time)
    for id in station_id[1:]:
        df_submit = df_submit.append(
            predict(
                test_station_df[str(id)],
                [t2m_model[str(id)], rh2m_model[str(id)], w10m_model[str(id)]],
                prd_time))

    df_submit.to_csv(output_path + ans_name, index=False)  # 预测并打印输出

    # 计算分数
    anen_file = output_path + ans_name
    print_score(fore_file, obs_file, anen_file)