def sorted_ob_fo_muti_model(ob, fo_list, save_path=None): ''' sorted_ob_fo_muti_model 多模式下先对数据排序,然后在画多个折线图子图 :param ob:一个实况数据 类型 dataframe :param fo_list:多模式预测数据 列表 类型list list中的类型是dataframe 每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同 :param save_path:保存地址 :return: ''' fo_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_list) ob = meger_df_data.iloc[:, -1].values data_len = len(fo_list) plt.figure(figsize=[10 * data_len, 4.8]) colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) plt.suptitle(title) for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]): fo = meger_df_data[fo_of_colnum].values plt.subplot(1, data_len, index + 1) ob_sorted = np.sort(ob) fo_sorted = np.sort(fo) plt.plot(fo_sorted, ob_sorted) if save_path is None: plt.show() else: plt.savefig(save_path)
def contingency_table_multi_mode(ob, fo_list, grade=None, save_path='contingency_table.xls', sheet_name='sheet1'): ''' :param ob_sta: 一个实况数据 类型 dataframe :param fo_sta_list: 多模式预测数据 列表 类型list list中的类型是dataframe 每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同 :param grade_list: 等级 :param save_path: 保存地址 :param sheet_name: xls 的sheet名 :return: ''' fo_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_list) ob_data = meger_df_data.iloc[:, -1] ob_data = ob_data.values colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) if ':' in title: title = title[:-13] title = title.translate(str.maketrans(':', ':', string.punctuation)) save_path = title + '.xls' for fo_of_colnum in meger_df_data.iloc[:, 7:-1]: fo_of_data = meger_df_data[fo_of_colnum].values yon.table.contingency_table(ob_data, fo_of_data, grade=grade, save_path=save_path, sheet_name=fo_of_colnum)
def box_plot_muti_model(ob, fo_df_list, save_path=None, x_lable='observation', y_lable='forecast', title='box-plot'): ''' box_plot 画一多模式数据的箱型图 --------------- :param observed:实况数据 df :param forecast:预测数据 df列表 :param save_path: 保存数据的路径 :param x_lable: 横坐标的标签 :param y_lable:纵坐标标签 :param title: 图片名字 :return: ''' fo_df_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_df_list) colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) ob = meger_df_data.iloc[:, -1] meger_df_data.drop(meger_df_data.columns[-1], axis=1, inplace=True) meger_df_data.insert(7, 'ob', ob) labels = meger_df_data.columns[7:] ob_and_fo_data = meger_df_data.iloc[:, 7:] ob_and_fo_ndarray_T = ob_and_fo_data.values.T ob_and_fo_tuple_T = tuple(ob_and_fo_ndarray_T) plt.boxplot(ob_and_fo_tuple_T, labels=labels) plt.title(title) if save_path is None: plt.show() else: plt.savefig(save_path)
def multi_mode_and_multi_classification_predictive_contingency_table( ob, fo_list, grade_list=None, save_path=None): ''' :param ob:一个实况数据 类型 dataframe :param fo_list: 多模式预测数据 列表 类型list list中的类型是dataframe 每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同 :param grade_list:等级 列表list :param save_path: 保存地址 :return: ''' fo_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_list) meger_df_data.to_csv('aa.csv') ob_data = meger_df_data.iloc[:, -1] ob_data = ob_data.values colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) if ':' in title: title = title[:-13] title = title.translate(str.maketrans(':', ':', string.punctuation)) save_path = title + '.xls' for fo_of_colnum in meger_df_data.iloc[:, 7:-1]: fo_of_data = meger_df_data[fo_of_colnum].values table.multi_category_contingency_table(ob_data, fo_of_data, grade_list=grade_list, save_path=save_path)
def reliability_diagrams_muti_model(ob, fo_list, grade_list=None, save_path=None, diagona_color='r', regression_line_color='g', broken_line_color='b'): ''' :param ob: :param fo_list: :param grade_list: :param save_path: :param diagona_color: :param regression_line_color: :param broken_line_color: :return: ''' fo_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_list) ob = meger_df_data.iloc[:, -1].values data_len = len(fo_list) plt.figure(figsize=[6.4 * data_len, 4.8]) colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) plt.suptitle(title) for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]): fo = meger_df_data[fo_of_colnum].values plt.subplot(1, data_len, index + 1) if grade_list is None: clevs = np.arange(0, 1.0, 10) # 如果没有给定概率等级,就设置默认等级 else: clevs = grade_list orfs = [0] for i in range(1, len(clevs)): index0 = np.where((fo > clevs[i - 1]) & (fo <= clevs[i])) num = np.sum(ob[index0] == 1) lenght = len(index0) orf = num / lenght orfs.append(orf) orfs = np.array(orfs) X = np.array(clevs) X = X.reshape((len(X), -1)) model = LinearRegression().fit(X, orfs) y = model.predict(X) plt.plot(X, y, color=regression_line_color) plt.plot(clevs, orfs, color=broken_line_color) plt.scatter(clevs, orfs, color=broken_line_color) plt.plot([0, 1], [0, 1], color=diagona_color) if save_path is None: plt.show() else: plt.savefig(save_path)
def scatter_regress_muti_model(ob, fo_df_list, save_path=None, scattercolor='r', scattersize=5, x_label='fo', y_label='bo', fontsize=10, line_color='r'): ''' scatter_regress_muti_model 多模式下画一张带有回归线的实况和预报数据的散点图 :param ob:一个实况数据 类型 dataframe :param fo_list:多模式预测数据 列表 类型list list中的类型是dataframe 每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同 :param save_path:保存地址 :param scattercolor:散点颜色 :param scattersize:散点的大小 :param x_label: 横坐标的名字 :param y_label: 纵坐标的名字 :param fontsize: 横纵坐标的名字字体大小 :param line_color:回归线的颜色 ''' fo_df_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_df_list) ob = meger_df_data.iloc[:, -1].values data_len = len(fo_df_list) plt.figure(figsize=[6.4 * data_len, 4.8]) colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) plt.suptitle(title) for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]): fo = meger_df_data[fo_of_colnum].values plt.subplot(1, data_len, index + 1) plt.plot(ob, fo, 'o', markerfacecolor=scattercolor, markersize=scattersize) # print(ob) # print(fo) ob_or_fo = np.hstack((ob, fo)) num_max = ob_or_fo.max() num_min = ob_or_fo.min() X = np.zeros((len(ob), 1)) X[:, 0] = ob clf = LinearRegression().fit(X, fo) ob_line = np.arange(0, np.max(ob), np.max(ob) / 30) X = np.zeros((len(ob_line), 1)) X[:, 0] = ob_line fo_rg = clf.predict(X) plt.plot(ob_line, fo_rg, line_color) plt.xlim(num_min - num_min / 5, num_max + num_max / 5) plt.xlabel(x_label, size=fontsize) plt.ylabel(y_label, size=fontsize) plt.title(fo_of_colnum) if save_path is None: plt.show() else: plt.savefig(save_path)
def multi_mode_and_multi_classification_predictive_contingency_table(ob, fo_list, grade_list=None, save_path=None): ''' :param ob:一个实况数据 类型 dataframe :param fo_list: 多模式预测数据 列表 类型list list中的类型是dataframe 每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同 :param grade_list:等级 列表list :param save_path: 保存地址 :return: ''' fo_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_list) ob_data = meger_df_data.iloc[:, -1] ob_data = ob_data.values colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) if ':' in title: title = title[:-13] title = title.translate(str.maketrans(':', ':', string.punctuation)) # print(title) if save_path is None: save_path = title + '.xlsx' else: save_path = save_path + '/' + title + '.xlsx' pathlib.Path(save_path).touch() writer = pd.ExcelWriter(save_path) for fo_of_colnum in meger_df_data.iloc[:, 7:-1]: fo_of_data = meger_df_data[fo_of_colnum].values table.multi_category_contingency_table(ob_data, fo_of_data, grade_list=grade_list, is_append_sheet=True, excel_writer=writer, sheet_name=fo_of_colnum, ) # multi_category_contingency_table(ob_data, fo_of_data, writer, fo_of_colnum, grade_list=grade_list) writer.close()
def frequency_histogram_muti_model(ob, fo_list, clevs, x_lable='frequency', save_path=None, y_lable='range', left_label='Obs', right_label='Pred', left_color='r', right_color='b', legend_location="upper right", width=0.2): ''' frequency_histogram_muti_model多模式下对比测试数据和实况数据的发生的频率 :param ob:一个实况数据 类型 dataframe :param fo_list:多模式预测数据 列表 类型list list中的类型是dataframe 每个dataframe 中的最后一列列名不能相同,表示时空数据的列为在前,列名相同 :param clevs: 等级 列表 :param x_lable: 横坐标的名字 :param save_path: 保存地址 :param y_lable: 纵坐标的名字 :param left_label: 左标注名字 :param right_label: 右标注名字 :param left_color: 左柱状图的颜色 :param right_color: 右柱状图颜色 :param legend_location: 标注所处的地点 :param width: 宽度 :return: ''' fo_list.append(ob) meger_df_data = pisd.merge_on_id_and_obTime(fo_list) ob = meger_df_data.iloc[:, -1].values data_len = len(fo_list)-1 fig,axs = plt.subplots(1,data_len,figsize=(4*data_len, 4)) plt.subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=0.88, wspace=0.2, hspace=0.2) colnums = ['level', 'id', 'time'] title = '' for colnum in colnums: the_duplicate_values = meger_df_data[colnum].unique() if len(the_duplicate_values) == 1: title = title + str(the_duplicate_values[0]) for index, fo_of_colnum in enumerate(meger_df_data.iloc[:, 7:-1]): fo = meger_df_data[fo_of_colnum].values #axe1 = plt.subplot(1, data_len, index + 1) axe1 = axs[index] p_ob = [] p_fo = [] xticklabels = [] for i in range(0, len(clevs) - 1): index0 = np.where((ob >= clevs[i]) & (ob < clevs[i + 1])) xticklabels.append(str(clevs[i]) + '-' + str(clevs[i + 1])) p_ob.append(len(index0[0]) / len(ob)) index0 = np.where((fo >= clevs[i]) & (fo < clevs[i + 1])) p_fo.append(len(index0[0]) / len(fo)) index0 = np.where(ob >= clevs[-1]) p_ob.append(len(index0[0]) / len(ob)) index0 = np.where(fo >= clevs[-1]) p_fo.append(len(index0[0]) / len(fo)) xticklabels.append('>=' + str(clevs[-1])) x = np.arange(0, len(p_ob)) axe1.bar(x + 0.1, p_ob, width=width, facecolor=left_color, label=left_label) axe1.bar(x - 0.1, p_fo, width=width, facecolor=right_color, label=right_label) axe1.legend() axe1.set_xlabel(x_lable, fontsize=10) axe1.set_xticks(x) axe1.set_xticklabels(xticklabels, fontsize=9) axe1.set_ylabel(y_lable, fontsize=10) axe1.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(100)) axe1.set_title(fo_of_colnum) plt.suptitle(title) if save_path is None: plt.show() else: plt.savefig(save_path)