def stats_codes_distribution(hash_list): codes = [list(h) for h in hash_list] #统计每一位频率分布 codes_hist, codes_hist_pd = create_hist_pd(codes) #计算统计信息:min max median, 卡方检验等等, 具体结果查看生成的csv codes_hist_pd_stats = stats_data(codes_hist_pd) return codes_hist_pd_stats
def plt_two_color_ball_stats(df, title, file_prefix): red_ball_file = '{}_two_color_ball_red_distribution.csv'.format( file_prefix) df_red_ball = df[['r1', 'r2', 'r3', 'r4', 'r5', 'r6']] red_ball_hist, df_red_ball_hist = create_hist_pd(df_red_ball) df_red_ball_hist_sum = df_red_ball_hist.apply(np.sum) df_red_ball_hist_stats = stats_data(pd.DataFrame(df_red_ball_hist_sum).T) df_red_ball_hist_stats.to_csv('./temp/' + red_ball_file) red_r0 = df_red_ball_hist_stats.iloc[0] print(red_ball_file, '保存到temp/') blue_ball_file = '{}_two_color_ball_blue_distribution.csv'.format( file_prefix) df_blue_ball = df['b1'] df_blue_ball_hist = pd.DataFrame( df_blue_ball.value_counts().sort_index(axis=0)).T df_blue_ball_hist_sum = df_blue_ball_hist.apply(np.sum) df_blue_ball_hist_stats = stats_data(pd.DataFrame(df_blue_ball_hist_sum).T) blue_r0 = df_blue_ball_hist_stats.iloc[0] df_blue_ball_hist_stats.to_csv('./temp/' + blue_ball_file) print(blue_ball_file, '保存到temp/') fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 15)) fig.subplots_adjust(left=0.1, bottom=0.05, right=0.95, top=0.93) ax1.grid(True) ax1.bar(df_red_ball_hist_sum.index, df_red_ball_hist_sum.values) ax1.set_title(title + ' - 红球(33选6)\n\ mean={}, std={}\nmin={}, 25%={}, 75%={}, max={}\nchi={}, pvalue={}' \ .format(red_r0['mean'], red_r0['std'], red_r0['min'], red_r0['25%'], red_r0['75%'], red_r0['max'], red_r0['chi'], red_r0['pvalue']), fontproperties="SimHei", fontsize=14, loc="left") # ax2.margins(x=0, y=0.2) ax2.grid(True) ax2.bar(df_blue_ball_hist_sum.index, df_blue_ball_hist_sum.values) ax2.set_title(title + ' - 蓝球(16选1)\n\ mean={}, std={}\nmin={}, 25%={}, 75%={}, max={}\nchi={}, pvalue={}' \ .format(blue_r0['mean'], blue_r0['std'], blue_r0['min'], blue_r0['25%'], blue_r0['75%'], blue_r0['max'], blue_r0['chi'], blue_r0['pvalue']), fontproperties="SimHei", fontsize=14, loc="left") img_file = file_prefix + '_two_color_ball_stats.jpg' fig.savefig('./temp/' + img_file) print(img_file, '保存到temp/')
import matplotlib.pyplot as plt import hashlib import sys from uniform_random_blocks import blocks from scipy.stats import kstest import scipy.stats as stats from util import hash_n, create_hist_pd, stats_data csv_file_name = 'uniform_random_548496_hash_all_codes_distribution.csv' img_file_name = 'uniform_random_548496_hash_all_codes_distribution.jpg' hash_list = blocks.hash codes = [list(h) for h in hash_list] #统计每一位频率分布 codes_hist, codes_hist_pd = create_hist_pd(codes) #求总数 codes_hist_sum = codes_hist_pd.apply(np.sum) #计算统计信息:min max median, 卡方检验等等, 具体结果查看生成的csv codes_hist_sum_stats = stats_data(pd.DataFrame(codes_hist_sum).T) codes_hist_sum_stats.to_csv('./temp/' + csv_file_name) print(csv_file_name, '保存到temp/') # result = codes_hist_pd.apply(lambda x:stats.chi2_contingency([x, [34281]*16]), axis=1) # result = codes_hist_pd.apply(lambda x:stats.chisquare(x), axis=1) # print(codes_hist_pd) r0 = codes_hist_sum_stats.iloc[0] #绘制柱形图 plt.figure(figsize=(8,5)) plt.subplots_adjust(left=0.1, bottom=0.05, right=0.95, top=0.85) plt.bar(codes_hist_sum.index, codes_hist_sum.values)
def get_hist_pd_stats(start, count): codes_product = [union_code(h, start, count) for h in hash_list] if len(codes_product[0]) == 0: return None codes_product_hist, codes_product_hist_pd = create_hist_pd(codes_product) return stats_data(codes_product_hist_pd)