Beispiel #1
0
def sorted_ob_fo_muti_model(ob, fo_list, save_path=None):
    '''
    sorted_ob_fo_muti_model 多模式下先对数据排序,然后在画多个折线图子图
    :param ob:一个实况数据  类型  dataframe
    :param fo_list:多模式预测数据 列表  类型list  list中的类型是dataframe
    每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同
    :param save_path:保存地址
    :return:
    '''
    fo_list.append(ob)
    meger_df_data = pisd.merge_on_id_and_obTime(fo_list)
    ob = meger_df_data.iloc[:, -1].values

    data_len = len(fo_list)
    plt.figure(figsize=[10 * data_len, 4.8])

    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()

        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])
    plt.suptitle(title)
    for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]):
        fo = meger_df_data[fo_of_colnum].values
        plt.subplot(1, data_len, index + 1)
        ob_sorted = np.sort(ob)
        fo_sorted = np.sort(fo)
        plt.plot(fo_sorted, ob_sorted)

    if save_path is None:
        plt.show()
    else:
        plt.savefig(save_path)
Beispiel #2
0
def contingency_table_multi_mode(ob, fo_list, grade=None, save_path='contingency_table.xls', sheet_name='sheet1'):
    '''

    :param ob_sta:  一个实况数据  类型  dataframe
    :param fo_sta_list: 多模式预测数据 列表  类型list  list中的类型是dataframe
    每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同
    :param grade_list: 等级
    :param save_path: 保存地址
    :param sheet_name: xls 的sheet名
    :return:
    '''
    fo_list.append(ob)
    meger_df_data = pisd.merge_on_id_and_obTime(fo_list)
    ob_data = meger_df_data.iloc[:, -1]
    ob_data = ob_data.values

    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()
        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])
        if ':' in title:
            title = title[:-13]
            title = title.translate(str.maketrans(':', ':', string.punctuation))
        save_path = title + '.xls'
    for fo_of_colnum in meger_df_data.iloc[:, 7:-1]:
        fo_of_data = meger_df_data[fo_of_colnum].values
        yon.table.contingency_table(ob_data, fo_of_data, grade=grade, save_path=save_path,
                                    sheet_name=fo_of_colnum)
Beispiel #3
0
def box_plot_muti_model(ob, fo_df_list, save_path=None, x_lable='observation', y_lable='forecast', title='box-plot'):
    '''
    box_plot 画一多模式数据的箱型图
    ---------------
    :param observed:实况数据 df
    :param forecast:预测数据 df列表
    :param save_path: 保存数据的路径
    :param x_lable: 横坐标的标签
    :param y_lable:纵坐标标签
    :param title: 图片名字
    :return:
    '''
    fo_df_list.append(ob)
    meger_df_data = pisd.merge_on_id_and_obTime(fo_df_list)

    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()
        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])

    ob = meger_df_data.iloc[:, -1]
    meger_df_data.drop(meger_df_data.columns[-1], axis=1, inplace=True)
    meger_df_data.insert(7, 'ob', ob)
    labels = meger_df_data.columns[7:]
    ob_and_fo_data = meger_df_data.iloc[:, 7:]
    ob_and_fo_ndarray_T = ob_and_fo_data.values.T
    ob_and_fo_tuple_T = tuple(ob_and_fo_ndarray_T)
    plt.boxplot(ob_and_fo_tuple_T, labels=labels)
    plt.title(title)
    if save_path is None:
        plt.show()
    else:
        plt.savefig(save_path)
Beispiel #4
0
def multi_mode_and_multi_classification_predictive_contingency_table(
        ob, fo_list, grade_list=None, save_path=None):
    '''

    :param ob:一个实况数据  类型  dataframe
    :param fo_list: 多模式预测数据 列表  类型list  list中的类型是dataframe
    每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同
    :param grade_list:等级  列表list
    :param save_path: 保存地址
    :return:
    '''
    fo_list.append(ob)
    meger_df_data = pisd.merge_on_id_and_obTime(fo_list)

    meger_df_data.to_csv('aa.csv')
    ob_data = meger_df_data.iloc[:, -1]
    ob_data = ob_data.values
    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()
        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])
    if ':' in title:
        title = title[:-13]
        title = title.translate(str.maketrans(':', ':', string.punctuation))
    save_path = title + '.xls'
    for fo_of_colnum in meger_df_data.iloc[:, 7:-1]:
        fo_of_data = meger_df_data[fo_of_colnum].values
        table.multi_category_contingency_table(ob_data,
                                               fo_of_data,
                                               grade_list=grade_list,
                                               save_path=save_path)
Beispiel #5
0
def reliability_diagrams_muti_model(ob,
                                    fo_list,
                                    grade_list=None,
                                    save_path=None,
                                    diagona_color='r',
                                    regression_line_color='g',
                                    broken_line_color='b'):
    '''

    :param ob:
    :param fo_list:
    :param grade_list:
    :param save_path:
    :param diagona_color:
    :param regression_line_color:
    :param broken_line_color:
    :return:
    '''
    fo_list.append(ob)

    meger_df_data = pisd.merge_on_id_and_obTime(fo_list)
    ob = meger_df_data.iloc[:, -1].values

    data_len = len(fo_list)
    plt.figure(figsize=[6.4 * data_len, 4.8])

    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()
        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])
    plt.suptitle(title)
    for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]):
        fo = meger_df_data[fo_of_colnum].values
        plt.subplot(1, data_len, index + 1)
        if grade_list is None:
            clevs = np.arange(0, 1.0, 10)  # 如果没有给定概率等级,就设置默认等级
        else:
            clevs = grade_list
        orfs = [0]
        for i in range(1, len(clevs)):
            index0 = np.where((fo > clevs[i - 1]) & (fo <= clevs[i]))
            num = np.sum(ob[index0] == 1)
            lenght = len(index0)
            orf = num / lenght
            orfs.append(orf)
        orfs = np.array(orfs)
        X = np.array(clevs)
        X = X.reshape((len(X), -1))
        model = LinearRegression().fit(X, orfs)
        y = model.predict(X)
        plt.plot(X, y, color=regression_line_color)
        plt.plot(clevs, orfs, color=broken_line_color)
        plt.scatter(clevs, orfs, color=broken_line_color)
        plt.plot([0, 1], [0, 1], color=diagona_color)
    if save_path is None:
        plt.show()
    else:
        plt.savefig(save_path)
Beispiel #6
0
def scatter_regress_muti_model(ob, fo_df_list, save_path=None, scattercolor='r', scattersize=5,
                               x_label='fo', y_label='bo', fontsize=10, line_color='r'):
    '''
    scatter_regress_muti_model  多模式下画一张带有回归线的实况和预报数据的散点图
    :param ob:一个实况数据  类型  dataframe
    :param fo_list:多模式预测数据 列表  类型list  list中的类型是dataframe
    每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同
    :param save_path:保存地址
    :param scattercolor:散点颜色
    :param scattersize:散点的大小
    :param x_label: 横坐标的名字
    :param y_label: 纵坐标的名字
    :param fontsize: 横纵坐标的名字字体大小
    :param line_color:回归线的颜色
    '''
    fo_df_list.append(ob)
    meger_df_data = pisd.merge_on_id_and_obTime(fo_df_list)

    ob = meger_df_data.iloc[:, -1].values
    data_len = len(fo_df_list)
    plt.figure(figsize=[6.4 * data_len, 4.8])

    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()
        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])
    plt.suptitle(title)
    for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]):

        fo = meger_df_data[fo_of_colnum].values
        plt.subplot(1, data_len, index + 1)
        plt.plot(ob, fo, 'o', markerfacecolor=scattercolor, markersize=scattersize)
        # print(ob)
        # print(fo)
        ob_or_fo = np.hstack((ob, fo))
        num_max = ob_or_fo.max()
        num_min = ob_or_fo.min()
        X = np.zeros((len(ob), 1))
        X[:, 0] = ob
        clf = LinearRegression().fit(X, fo)
        ob_line = np.arange(0, np.max(ob), np.max(ob) / 30)
        X = np.zeros((len(ob_line), 1))
        X[:, 0] = ob_line
        fo_rg = clf.predict(X)
        plt.plot(ob_line, fo_rg, line_color)
        plt.xlim(num_min - num_min / 5, num_max + num_max / 5)
        plt.xlabel(x_label, size=fontsize)

        plt.ylabel(y_label, size=fontsize)

        plt.title(fo_of_colnum)

    if save_path is None:
        plt.show()
    else:
        plt.savefig(save_path)
Beispiel #7
0
def multi_mode_and_multi_classification_predictive_contingency_table(ob, fo_list, grade_list=None, save_path=None):
    '''

    :param ob:一个实况数据  类型  dataframe
    :param fo_list: 多模式预测数据 列表  类型list  list中的类型是dataframe
    每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同
    :param grade_list:等级  列表list
    :param save_path: 保存地址
    :return:
    '''
    fo_list.append(ob)
    meger_df_data = pisd.merge_on_id_and_obTime(fo_list)
    ob_data = meger_df_data.iloc[:, -1]
    ob_data = ob_data.values
    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()
        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])
    if ':' in title:
        title = title[:-13]
        title = title.translate(str.maketrans(':', ':', string.punctuation))
    # print(title)
    if save_path is None:
        save_path = title + '.xlsx'
    else:
        save_path = save_path + '/' + title + '.xlsx'

    pathlib.Path(save_path).touch()
    writer = pd.ExcelWriter(save_path)
    for fo_of_colnum in meger_df_data.iloc[:, 7:-1]:
        fo_of_data = meger_df_data[fo_of_colnum].values
        table.multi_category_contingency_table(ob_data, fo_of_data, grade_list=grade_list, is_append_sheet=True,
                                               excel_writer=writer, sheet_name=fo_of_colnum, )
        # multi_category_contingency_table(ob_data, fo_of_data, writer, fo_of_colnum, grade_list=grade_list)
    writer.close()
Beispiel #8
0
def frequency_histogram_muti_model(ob, fo_list, clevs, x_lable='frequency', save_path=None,
                                   y_lable='range', left_label='Obs', right_label='Pred',
                                   left_color='r', right_color='b', legend_location="upper right", width=0.2):
    '''
    frequency_histogram_muti_model多模式下对比测试数据和实况数据的发生的频率
    :param ob:一个实况数据  类型  dataframe
    :param fo_list:多模式预测数据 列表  类型list  list中的类型是dataframe
    每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同
    :param clevs: 等级  列表
    :param x_lable: 横坐标的名字
    :param save_path: 保存地址
    :param y_lable: 纵坐标的名字
    :param left_label: 左标注名字
    :param right_label: 右标注名字
    :param left_color: 左柱状图的颜色
    :param right_color: 右柱状图颜色
    :param legend_location: 标注所处的地点
    :param width: 宽度
    :return:
    '''
    fo_list.append(ob)
    meger_df_data = pisd.merge_on_id_and_obTime(fo_list)

    ob = meger_df_data.iloc[:, -1].values

    data_len = len(fo_list)-1
    fig,axs = plt.subplots(1,data_len,figsize=(4*data_len, 4))
    plt.subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=0.88,
                        wspace=0.2, hspace=0.2)

    colnums = ['level', 'id', 'time']
    title = ''
    for colnum in colnums:
        the_duplicate_values = meger_df_data[colnum].unique()
        if len(the_duplicate_values) == 1:
            title = title + str(the_duplicate_values[0])

    for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]):
        fo = meger_df_data[fo_of_colnum].values
        #axe1 = plt.subplot(1, data_len, index + 1)
        axe1 = axs[index]
        p_ob = []
        p_fo = []

        xticklabels = []
        for i in range(0, len(clevs) - 1):
            index0 = np.where((ob >= clevs[i]) & (ob < clevs[i + 1]))
            xticklabels.append(str(clevs[i]) + '-' + str(clevs[i + 1]))
            p_ob.append(len(index0[0]) / len(ob))
            index0 = np.where((fo >= clevs[i]) & (fo < clevs[i + 1]))
            p_fo.append(len(index0[0]) / len(fo))
        index0 = np.where(ob >= clevs[-1])
        p_ob.append(len(index0[0]) / len(ob))
        index0 = np.where(fo >= clevs[-1])
        p_fo.append(len(index0[0]) / len(fo))
        xticklabels.append('>=' + str(clevs[-1]))
        x = np.arange(0, len(p_ob))

        axe1.bar(x + 0.1, p_ob, width=width, facecolor=left_color, label=left_label)
        axe1.bar(x - 0.1, p_fo, width=width, facecolor=right_color, label=right_label)
        axe1.legend()
        axe1.set_xlabel(x_lable, fontsize=10)
        axe1.set_xticks(x)
        axe1.set_xticklabels(xticklabels, fontsize=9)
        axe1.set_ylabel(y_lable, fontsize=10)
        axe1.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(100))
        axe1.set_title(fo_of_colnum)
    plt.suptitle(title)
    if save_path is None:
        plt.show()
    else:
        plt.savefig(save_path)