예제 #1
0
def comparing_stat_patch_list(pred, y):
    patch1 = mpatches.Patch(color='red',
                            label='mean:' + ('%03.6f' % np.mean(pred)))
    patch2 = mpatches.Patch(color='red',
                            label='std:' + ('%03.6f' % np.std(pred)))
    patch3 = mpatches.Patch(color='red',
                            label='skewness:' +
                            ('%03.3f' % stats.skewness(pred)))
    patch4 = mpatches.Patch(color='red',
                            label='kurtosis:' +
                            ('%03.3f' % stats.kurtosis(pred)))
    patch5 = mpatches.Patch(color='blue',
                            label='mean:' + ('%03.6f' % np.mean(y)))
    patch6 = mpatches.Patch(color='blue',
                            label='std:' + ('%03.6f' % np.std(y)))
    patch7 = mpatches.Patch(color='blue',
                            label='skewness:' + ('%03.3f' % stats.skewness(y)))
    patch8 = mpatches.Patch(color='blue',
                            label='kurtosis:' + ('%03.3f' % stats.kurtosis(y)))
    #patch9 = mpatches.Patch(color='black', label= 'MAPE:'+ ('%03.3f' % stats.mape(pred, y)))
    patch10 = mpatches.Patch(color='black',
                             label='RMSE:' + ('%03.6f' % stats.rmse(pred, y)))
    #plt.text(.25,.5,str(np.mean(pred)))
    return [
        patch5, patch6, patch7, patch8, patch1, patch2, patch3, patch4, patch10
    ]
예제 #2
0
def data_description(index, start, end):
    returns = download_data.get_returns(index, start, end)
    print('个数:', len(returns))
    print('平均值:', np.mean(returns))
    print('中位数:', np.median(returns))
    print('上四分位数', sts.quantile(returns, p=0.25))
    print('下四分位数', sts.quantile(returns, p=0.75))
    #离散趋势的度量
    print('最大值:', np.max(returns))
    print('最小值:', np.min(returns))
    print('极差:', np.max(returns) - np.min(returns))
    print('四分位差',
          sts.quantile(returns, p=0.75) - sts.quantile(returns, p=0.25))
    print('标准差:', np.std(returns))
    print('方差:', np.var(returns))
    print('离散系数:', np.std(returns) / np.mean(returns))
    #偏度与峰度的度量
    print('偏度:', sts.skewness(returns))
    print('峰度:', sts.kurtosis(returns))
    print(st.kstest(returns, 'norm'))
    length = len(returns)
    sns.distplot(returns, bins=100, label='Empirical')
    sns.plt.legend()
    sns.plt.title('Empirical')
    sns.plt.show()
예제 #3
0
def get_seq_feature(seq, seq_name, user_id):
    if not seq:
        print('seq is empty! : %s'%seq_name)
        return
    df = pd.DataFrame()
    df[seq_name + '_mean'] = [np.mean(seq)]
    df[seq_name + '_median'] = [np.median(seq)]
    df[seq_name + '_max'] = [np.max(seq)]
    df[seq_name + '_min'] = [np.min(seq)]
    df[seq_name + '_var'] = [np.var(seq)]
    df[seq_name + '_std'] = [np.std(seq)]
    if np.mean(seq) != 0:
        df[seq_name + '_discrete'] = [np.std(seq) / np.mean(seq)]
    else:
        df[seq_name + '_discrete'] = [np.NaN]
    try:
        df[seq_name + '_skew'] = [sts.skewness(seq)]
    except:
        df[seq_name + '_skew'] = [np.NaN]
    try:
        df[seq_name + '_kurt'] = [sts.kurtosis(seq)]
    except:
        df[seq_name + '_kurt'] = [np.NaN]
    df['user_id'] = [user_id]
    return df
예제 #4
0
def print_stats(proofs, sizes, search_sizes, total, successes, timeouts):
    print(f'Successes: {successes}/{total}: {int(100 * (successes / total))}%')
    print(f'Timeouts: {timeouts}/{total}: {int(100 * (timeouts / total))}%')

    mean = statistics.mean(sizes)
    median = statistics.median_low(sizes)
    most_common = collections.Counter(sizes).most_common(3)
    longest = max(sizes)
    shortest = min(sizes)
    # most_common = collections.Counter(sizes).most_common(1)[0][0]
    # for p in proofs:
    #     print('=======')
    #     print(p[1])
    skewness = stats.skewness(sizes, mean)
    variance = statistics.variance(sizes, mean)
    excess_kurtosis = stats.excess_kurtosis(sizes, mean)

    if ABBREV:
        print(f'{mean:.2f}, {median}, {most_common},', end=' ')
        print(f'{skewness:.2f}, {variance:.2f}, {excess_kurtosis:.2f}')
    else:
        print(f'mean: {mean}')
        print(f'median: {median}')
        print(f'most_common: {most_common}')
        print(f'shortest: {shortest}')
        print(f'longest: {longest}')
        print(f'skewness: {skewness}')
        print(f'variance: {variance}')
        print(f'excess kurtosis: {excess_kurtosis}')
    return successes / len(implications)
예제 #5
0
def get_seq_feature(seq, seq_name, user_id):
    # total 11 features
    if not seq:
        print('seq is empty!')
        return
    df = pd.DataFrame()
    df[seq_name + '_mean'] = [np.mean(seq)]
    df[seq_name + '_median'] = [np.median(seq)]
    df[seq_name + '_max'] = [np.max(seq)]
    df[seq_name + '_min'] = [np.min(seq)]
    df[seq_name + '_var'] = [np.var(seq)]
    df[seq_name + '_std'] = [np.std(seq)]
    if len(seq) == 1:
        df[seq_name + '_upquantile'] = seq[0]
        df[seq_name + '_downquantile'] = 0
    else:
        df[seq_name + '_upquantile'] = [sts.quantile(seq, p=0.75)]
        df[seq_name + '_downquantile'] = [sts.quantile(seq, p=0.25)]
    if np.mean(seq) != 0: df[seq_name + '_discrete'] = [np.std(seq) / np.mean(seq)]
    else: df[seq_name + '_discrete'] = [np.NaN]
    try: df[seq_name + 'skew'] = [sts.skewness(seq)]
    except: df[seq_name + 'skew'] = [np.NaN]
    try: df[seq_name + 'kurt'] = [sts.kurtosis(seq)]
    except: df[seq_name + 'kurt'] = [np.NaN]
    df['user_id'] = [user_id]
    return df
예제 #6
0
def extend_feature(scores):
    """
    特征构造

    Args:
        scores: 原始滑动窗口获得的特征
    Returns:
        返回基于滑动窗口特征增加的统计特征
    """
    features = scores
    features.append(np.sum(scores))  #总数
    features.append(np.mean(scores))  #平均数
    features.append(np.median(scores))  #中位数
    # features.append(sts.mode(scores)) #众数
    features.append(sts.quantile(scores, p=0.25))  #上四分位
    features.append(sts.quantile(scores, p=0.75))  #上七分位
    features.append(np.max(scores))  #最大值
    features.append(np.min(scores))  #最小值
    features.append(np.max(scores) - np.min(scores))  #极差
    features.append(
        sts.quantile(scores, p=0.75) - sts.quantile(scores, p=0.25))  #四分位差
    features.append(np.var(scores))  #方差
    features.append(np.std(scores) / np.mean(scores))  #离散系数
    features.append(sts.skewness(scores))  #偏度
    features.append(sts.kurtosis(scores))  #峰度
    return features
예제 #7
0
def stat_patch_list(x):
    patch0 = mpatches.Patch(label='data num:' + str((x.size)))
    patch1 = mpatches.Patch(label='mean:' + ('%03.6f' % np.mean(x)))
    patch2 = mpatches.Patch(label='std:' + ('%03.6f' % np.std(x)))
    patch3 = mpatches.Patch(label='skewness:' + ('%03.3f' % stats.skewness(x)))
    patch4 = mpatches.Patch(label='kurtosis:' + ('%03.3f' % stats.kurtosis(x)))
    return [patch0, patch1, patch2, patch3, patch4]
예제 #8
0
def kusk(t_win, stockdata, indexes):
    '''

    :param t_win: 窗口长度
    :param stockdata: 股票序列
    :param indexes: 遍历下标
    :return: 滑动窗口得到股票序列的峰度、偏度、波动率、收益率指标
    '''
    sk = []
    ku = []
    std = []
    res = []
    for j in indexes:
        s = stockdata.iloc[-j - 2 * t_win:-j - t_win]
        print("s_length: ", len(s))
        sk.append(sts.skewness(s))
        ku.append(sts.kurtosis(s))
        std.append(np.std(s))
        res.append((s.iloc[-1] - s.iloc[0]) / s.iloc[0])

    df = pd.DataFrame({
        "时间": pd.Series(s.date),
        "峰度": pd.Series(sk),
        "偏度": pd.Series(ku),
        "波动率": pd.Series(std),
        "收益率": pd.Series(res)
    })
    return df
예제 #9
0
파일: util.py 프로젝트: ClorisUcas/Qi_state
def diff(file):
    # 绘制epoch1的正切值求角度

    f = open('../filter1/filter_euclidean/' + file+'_ee1.txt', 'r')
    f_new = open('../filter1/filter_euclidean_skwess/' + file + '.txt', 'w')
    dicty = []
    for line_raw in f:
        line = line_raw.replace('\n','').split(' ')
        if len(dicty) == 0:
            dicty = [0 for i in range(len(line)-1)]
        for i in range(len(line)):
            if i == 0 :continue
            if dicty[i-1] == 0:
                dicty[i-1] = float(line[i])
            else:
                dicty[i - 1] += float(line[i])
    # k_function = []
    # for i in range(len(dicty)):
    #     if i == 0: continue
    #     immediate = (dicty[i]-dicty[0])/i
    #     k_function.append(immediate)
    #
    # # 求正切de差值变化
    # y = [k_function[0]]
    # for i in range(len(k_function) - 1):
    #     immediate = abs(k_function[i] - k_function[i + 1])
    #     y.append(immediate)
    # y_theta = []
    # for i in range(len(k_function)):
    #     theta = math.degrees(math.atan(k_function[i]))
    #     y_theta.append(theta)
    # y_theta1 = []
    # for i in range(len(y_theta) - 1):
    #     theta = abs(y_theta[i] - y_theta[i + 1])
    #     y_theta1.append(theta)

    y = []
    k_function = dicty
    for i in range(len(k_function) - 1):
        immediate = k_function[i+1] - k_function[i]
        y.append(immediate)

    for i in range(len(y)):
        if i < 3: continue
        f_new.write(str(i) + '\n')
        y_new = y[:i]
        print(str(i))
        #求方差
        a = 0
        b = 0
        start = 0
        vars = []

        while start !=len(y_new)-2:
            vars.append(sts.skewness(y_new[start:]))
            start += 1
        print(vars)

        length = len(vars)
        for i in range(length):
            if i == length:
                if vars[i] < 1:  a = i + 1
                else:b=''
            else:
                if vars[i] < 1 and vars[i + 1] < 1:
                    a = i + 1
                    break
                else:
                    a = ''

        end = 2
        vars0 = []
        while end !=len(y_new):
            y_end = y_new[:end]
            vars0.append(sts.skewness(y_end))
            end += 1
        print(vars0)

        for i in range(len(vars0)):
            if i ==len(vars0):
                if vars0[i] > 1 :
                    b = i+2
                else:
                    b = ''
            else:
                if vars0[i] > 1 and vars0[i+1] > 1:
                    b = i+2
                    break
                else:
                    b = ''

        print(file +' ['+str(a)+' , '+str(b)+']')

        f_new.write(str(vars)+'\n')
        f_new.write(str(vars0) + '\n')
        f_new.write(' ['+str(a)+' , '+str(b)+']'+'\n')
예제 #10
0
import numpy as np
import stats as sts
a = [31, 24, 23, 25, 14, 25, 13, 12, 14, 23,
          32, 34, 43, 41, 21, 23, 26, 26, 34, 42,
          43, 25, 24, 23, 24, 44, 23, 14, 52,32,
          42, 44, 35, 28, 17, 21, 32, 42, 12, 34]
scores=np.array(a)
print('總合為:',np.sum(scores))
print('筆數為:',len(scores))
print('平均值為:',np.mean(scores))
print('中位數為:',np.median(scores))
print('眾數為:',sts.mode(scores))
print('上四分位數為',sts.quantile(scores,p=0.25))
print('下四分位數為',sts.quantile(scores,p=0.75))
print('最大值:',np.max(scores))
print('最小值:',np.min(scores))
print('全距:',np.ptp(scores))
print('標準差:',np.std(scores))
print('變異數:',np.var(scores))
print('離散係數:',np.std(scores)/np.mean(scores))
print('偏態係數:',sts.skewness(scores))
print('峰態係數:',sts.kurtosis(scores))
예제 #11
0
import numpy as np
import stats as sts
scares = [
    31, 24, 23, 25, 14, 25, 13, 12, 14, 23, 32, 34, 43, 41, 21, 23, 26, 26, 34,
    42, 43, 25, 24, 23, 24, 44, 23, 14, 52, 32, 42, 44, 35, 28, 17, 21, 32, 42,
    12, 34
]

print('求和:', np.sum(scares))
print('個數:', len(scares))
print('平均值:', np.mean(scares))
print('中位數:', np.median(scares))
print('眾數:', sts.mode(scares))
print('上四分位數:', sts.quantile(scares, p=0.25))
print('下四分位數:', sts.quantile(scares, p=0.75))

print('最大值:', np.max(scares))
print('最小值:', np.min(scares))
print('極差:', np.std(scares))
print('四分位數:', sts.quantile(scares, p=0.75), sts.quantile(scares, p=0.25))
print('標準差:', np.std(scares))
print('方差', np.var(scares))
print('離散係數', np.std(scares) / np.mean(scares))

print('遍度:', sts.skewness(scares))
print('峰度:', sts.kurtosis(scares))
예제 #12
0
print('方差', df['身高'].var())
print('标准差', df['身高'].std())
print('极差', df['身高'].max() - df['身高'].min())

print('偏度', df['身高'].skew())
print('峰度', df['身高'].kurt())

import numpy as np
import stats as sts
scores = [1, 2, 2, 2, 5]
#集中趋势的度量
print('求和:', np.sum(scores))
print('个数:', len(scores))
print('平均值:', np.mean(scores))
print('中位数:', np.median(scores))
print('众数:', sts.mode(scores))
print('上四分位数', sts.quantile(scores, p=0.25))
print('下四分位数', sts.quantile(scores, p=0.75))
#离散趋势的度量
print('最大值:', np.max(scores))
print('最小值:', np.min(scores))
print('极差:', np.max(scores) - np.min(scores))
print('四分位差', sts.quantile(scores, p=0.75) - sts.quantile(scores, p=0.25))
print('标准差:', np.std(scores))
print('方差:', np.var(scores))
print('离散系数:', np.std(scores) / np.mean(scores))
#偏度与峰度的度量
print('偏度:', sts.skewness(scores))
print('峰度:', sts.kurtosis(scores))
예제 #13
0
print('个数:', len(data))
print('平均值:', np.mean(data))
print('中位数:', np.median(data))
print('众数:', sts.mode(data))
print('上四分位数', sts.quantile(data, p=0.25))
print('下四分位数', sts.quantile(data, p=0.75))
#离散趋势的度量
print('最大值:', np.max(data))
print('最小值:', np.min(data))
print('极差:', np.max(data) - np.min(data))
print('四分位差', sts.quantile(data, p=0.75) - sts.quantile(data, p=0.25))
print('标准差:', np.std(data))
print('方差:', np.var(data))
print('变异系数:', np.std(data) / np.mean(data))
#偏度与峰度的度量
print('偏度:', sts.skewness(data))
print('峰度:', sts.kurtosis(data))

# 随机生成两个样本
x = np.random.randint(0, 9, 1000)
y = np.random.randint(0, 9, 1000)

# 计算平均值
mx = x.mean()
my = y.mean()

# 计算标准差
stdx = x.std()
stdy = y.std()

# 计算协方差矩阵
예제 #14
0
 def skewness(self, data):
     print('偏度:', sts.skewness(data))
예제 #15
0
import pandas as pd
import numpy as np
import stats as sts

#建立一個15*5的二維陣列
post = np.zeros((15, 5))
#post
for i in range(15):
    path = 'data/post/' + str(i + 1) + '.csv'
    #將V前1萬5千筆讀出來並分析,skiprows為跳過第一個row
    V = pd.read_csv(path, skiprows=1)['V'][0:20004]
    post[i, 0] = np.std(V) / np.mean(V)  #離散係數
    post[i, 1] = np.max(V) - np.min(V)  #極差
    post[i, 2] = sts.skewness(V)  #偏度
    post[i, 3] = sts.kurtosis(V)  #峰度
    #post 為1
    post[i, 4] = '1'

pre = np.zeros((15, 5))
#pre
for i in range(15):
    path = 'data/pre/' + str(i + 1) + '.csv'
    #將V前1萬5千筆讀出來並分析,skiprows為跳過第一個row
    V = pd.read_csv(path, skiprows=1)['V'][0:20004]
    pre[i, 0] = np.std(V) / np.mean(V)  #離散係數
    pre[i, 1] = np.max(V) - np.min(V)  #極差
    pre[i, 2] = sts.skewness(V)  #偏度
    pre[i, 3] = sts.kurtosis(V)  #峰度
    #pre 為0
    pre[i, 4] = '0'
예제 #16
0
def Ts_skewness(x, n):
    return stats.skewness(x[-n:])
예제 #17
0
# -*- coding: utf-8 -*-
"""
@author: Daniel
@contact: [email protected]
@file: skew_learn.py
@time: 2017/7/25 9:10
"""
import stats
test = stats.skewness([1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5])
print(test)
total1 = stats.total(list1)  
total2 = stats.total(list2)  
mean1 = stats.mean(list1)  
mean2 = stats.mean(list2)  
mode1 = stats.mode(list1)  
mode2 = stats.mode(list2)  
median1 = stats.median(list1)  
median2 = stats.median(list2)  
variance1 = stats.variance(list1)  
variance2 = stats.variance(list2)  
standard_deviation1 = stats.SD(list1)  
standard_deviation2 = stats.SD(list2)  
covariance_pop = stats.covariance(list1, list2)  
covariance_sample = stats.covariance(list1, list2, True)  
correlation = stats.correlation(list1, list2)  
skewness_pop1 = stats.skewness(list1)  
skewness_pop2 = stats.skewness(list2)  
skewness_sample1 = stats.skewness(list1, True)  
skewness_sample2 = stats.skewness(list2, True)  
kurtosis_pop1 = stats.kurtosis(list1)  
kurtosis_pop2 = stats.kurtosis(list2)  
kurtosis_sample1 = stats.kurtosis(list1, True)  
kurtosis_sample2 = stats.kurtosis(list2, True)  

print("Total1:", total1)
print("Total2:", total2)
print("Mean1:", mean1)
print("Mean2", mean2)
print("Mode1:", mode1)
print("Mode2:", mode2)
print("Median1:", median1)