コード例 #1
0
ファイル: sha_test.py プロジェクト: mmmy/research_block
def stats_codes_distribution(hash_list):
    codes = [list(h) for h in hash_list]
    #统计每一位频率分布
    codes_hist, codes_hist_pd = create_hist_pd(codes)
    #计算统计信息:min max median, 卡方检验等等, 具体结果查看生成的csv
    codes_hist_pd_stats = stats_data(codes_hist_pd)
    return codes_hist_pd_stats
コード例 #2
0
def plt_two_color_ball_stats(df, title, file_prefix):
    red_ball_file = '{}_two_color_ball_red_distribution.csv'.format(
        file_prefix)
    df_red_ball = df[['r1', 'r2', 'r3', 'r4', 'r5', 'r6']]
    red_ball_hist, df_red_ball_hist = create_hist_pd(df_red_ball)
    df_red_ball_hist_sum = df_red_ball_hist.apply(np.sum)
    df_red_ball_hist_stats = stats_data(pd.DataFrame(df_red_ball_hist_sum).T)
    df_red_ball_hist_stats.to_csv('./temp/' + red_ball_file)
    red_r0 = df_red_ball_hist_stats.iloc[0]
    print(red_ball_file, '保存到temp/')

    blue_ball_file = '{}_two_color_ball_blue_distribution.csv'.format(
        file_prefix)
    df_blue_ball = df['b1']
    df_blue_ball_hist = pd.DataFrame(
        df_blue_ball.value_counts().sort_index(axis=0)).T
    df_blue_ball_hist_sum = df_blue_ball_hist.apply(np.sum)
    df_blue_ball_hist_stats = stats_data(pd.DataFrame(df_blue_ball_hist_sum).T)
    blue_r0 = df_blue_ball_hist_stats.iloc[0]
    df_blue_ball_hist_stats.to_csv('./temp/' + blue_ball_file)
    print(blue_ball_file, '保存到temp/')

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 15))
    fig.subplots_adjust(left=0.1, bottom=0.05, right=0.95, top=0.93)
    ax1.grid(True)
    ax1.bar(df_red_ball_hist_sum.index, df_red_ball_hist_sum.values)
    ax1.set_title(title + ' - 红球(33选6)\n\
                mean={}, std={}\nmin={}, 25%={}, 75%={}, max={}\nchi={}, pvalue={}'  \
                  .format(red_r0['mean'], red_r0['std'], red_r0['min'], red_r0['25%'], red_r0['75%'], red_r0['max'], red_r0['chi'], red_r0['pvalue']),
                  fontproperties="SimHei", fontsize=14, loc="left")

    # ax2.margins(x=0, y=0.2)
    ax2.grid(True)
    ax2.bar(df_blue_ball_hist_sum.index, df_blue_ball_hist_sum.values)
    ax2.set_title(title + ' - 蓝球(16选1)\n\
                mean={}, std={}\nmin={}, 25%={}, 75%={}, max={}\nchi={}, pvalue={}'  \
                  .format(blue_r0['mean'], blue_r0['std'], blue_r0['min'], blue_r0['25%'], blue_r0['75%'], blue_r0['max'], blue_r0['chi'], blue_r0['pvalue']),
                  fontproperties="SimHei", fontsize=14, loc="left")

    img_file = file_prefix + '_two_color_ball_stats.jpg'
    fig.savefig('./temp/' + img_file)
    print(img_file, '保存到temp/')
import matplotlib.pyplot as plt
import hashlib
import sys
from uniform_random_blocks import blocks
from scipy.stats import kstest
import scipy.stats as stats
from util import hash_n, create_hist_pd, stats_data

csv_file_name = 'uniform_random_548496_hash_all_codes_distribution.csv'
img_file_name = 'uniform_random_548496_hash_all_codes_distribution.jpg'

hash_list = blocks.hash

codes = [list(h) for h in hash_list]
#统计每一位频率分布
codes_hist, codes_hist_pd = create_hist_pd(codes)
#求总数
codes_hist_sum = codes_hist_pd.apply(np.sum)
#计算统计信息:min max median, 卡方检验等等, 具体结果查看生成的csv
codes_hist_sum_stats = stats_data(pd.DataFrame(codes_hist_sum).T)
codes_hist_sum_stats.to_csv('./temp/' + csv_file_name)
print(csv_file_name, '保存到temp/')
# result = codes_hist_pd.apply(lambda x:stats.chi2_contingency([x, [34281]*16]), axis=1)
# result = codes_hist_pd.apply(lambda x:stats.chisquare(x), axis=1)

# print(codes_hist_pd)
r0 = codes_hist_sum_stats.iloc[0]
#绘制柱形图
plt.figure(figsize=(8,5))
plt.subplots_adjust(left=0.1, bottom=0.05, right=0.95, top=0.85)
plt.bar(codes_hist_sum.index, codes_hist_sum.values)
コード例 #4
0
def get_hist_pd_stats(start, count):
    codes_product = [union_code(h, start, count) for h in hash_list]
    if len(codes_product[0]) == 0:
        return None
    codes_product_hist, codes_product_hist_pd = create_hist_pd(codes_product)
    return stats_data(codes_product_hist_pd)