Ejemplo n.º 1
0
def test_kde():
    """
    Tests for kernel density estimation. To test standard KDE, we use a random
    normal to learn the density, generate 1000 samples, and then check that the
    means/standard deviations/kurtosis figures are similar to within a margin 
    epsilon.
    TODO: Need a test that adjusts the bandwidth factor a.
    """
    eps = 0.1
    kde = KDE()
    
    # Test standard KDE.
    X   = pandas.DataFrame(np.random.multivariate_normal([0, 0], \
                           np.eye(2), (1000, )))
    S   = kde.run(X, n_samples = 1000)
    assert np.mean( S.std(0) - X.std(0))    < eps
    assert np.mean( S.mean(0) - X.mean(0))  < eps
    assert np.mean(kurtosis(S, 0) - kurtosis(X, 0)) < eps

    # Test partitioned KDE
    counts   = {'nGood': 250, 'nCritical': 100, 'nFail': 100}
    columns  = ['A', 'B']
    spec_lims = pandas.DataFrame({columns[0]: np.array([-2.0, 2.0]), \
                                  columns[1]: np.array([-2.0, 2.0])})
    specs  = Specs(specs=spec_lims).gen_crit_region(5.5/6, 6.5/6)
    A   = pandas.DataFrame(np.random.multivariate_normal([0, 0], \
                           np.eye(2), (1000, )), columns=columns)
    S   = kde.run(A, specs=specs, counts=counts)
    assert np.mean(  S.std(0) - 1.3 * A.std(0)) < eps
    assert np.mean( S.mean(0) - A.mean(0))      < eps
Ejemplo n.º 2
0
def getdata(data,samplerate=44100):
    data=[(float)(i) for i in data]
    sound_list["samplerate"] = samplerate
    sound_list["wavedata"] = data
    sound_list["number_of_samples"] = (sound_list["wavedata"]).shape[0]
    sound_list["song_length"] = int(sound_list["number_of_samples"] / samplerate)
    ans=[]
    zcr,ts=zero_crossing_rate(data,1024,sound_list["samplerate"])
    ans=[np.min(zcr),np.max(zcr),np.mean(zcr),np.std(zcr),np.median(zcr),st.skew(zcr),st.kurtosis(zcr)]
    rms,ts=root_mean_square(data,1024,sound_list["samplerate"])
    rms=[np.min(rms),np.max(rms),np.mean(rms),np.std(rms),np.median(rms),st.skew(rms),st.kurtosis(rms)]
    ans=ans+rms
    sc,ts=spectral_centroid(data,1024,sound_list["samplerate"])
    rms=[np.min(sc),np.max(sc),np.mean(sc),np.std(sc),np.median(sc),st.skew(sc),st.kurtosis(sc)]
    ans=ans+rms
    sr,ts=spectral_rolloff(data,1024,sound_list["samplerate"])
    rms=[np.min(sr),np.max(sr),np.mean(sr),np.std(sr),np.median(sr),st.skew(sr),st.kurtosis(sr)]
    ans=ans+rms
    sf,ts=spectral_flux(data,1024,sound_list["samplerate"])
    rms=[np.min(sf),np.max(sf),np.mean(sf),np.std(sf),np.median(sf),st.skew(sf),st.kurtosis(sf)]
    ans=ans+rms
    x.set_input_data(data)
    mfcc=x.MFCCs
    for pop in mfcc:
        for poop in pop:
            ans.append(poop)
    return ans
Ejemplo n.º 3
0
def get_mfcc_features(filename):
    feature_dict = {}
    (rate, sig) = wav.read(filename)

    if sig.ndim == 2:
        # wav is stereo so average over both channels
        try:
            mfcc_feat_chan0 = mfcc(sig[:, 0],
                                   rate,
                                   numcep=15,
                                   appendEnergy=True)
            mfcc_feat_chan1 = mfcc(sig[:, 1],
                                   rate,
                                   numcep=15,
                                   appendEnergy=True)
            mfcc_feat = (mfcc_feat_chan0 + mfcc_feat_chan1) / 2
        except IndexError:
            print('Index error')
            mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True)

    else:
        mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True)

    # Velocity is the difference between timestep t+1 and t for each mfcc_feat / 2
    vel = (mfcc_feat[:-1, :] - mfcc_feat[1:, :]) / 2.0
    # Acceleration is the difference between timestep t+1 and t for each velocity / 2
    acc = (vel[:-1, :] - vel[1:, :]) / 2.0
    mfcc_means = []
    for i in range(0, 14):
        key = "energy" if i == 0 else "mfcc" + str(i)
        # mfcc
        feature_dict[key + "_mean"] = mfcc_feat[:, i].mean()
        feature_dict[key + "_var"] = mfcc_feat[:, i].var()
        feature_dict[key + "_skewness"] = st.skew(mfcc_feat[:, i])
        feature_dict[key + "_kurtosis"] = st.kurtosis(mfcc_feat[:, i])
        # Vel
        feature_dict[key + "_vel_mean"] = vel[:, i].mean()
        feature_dict[key + "_vel_var"] = vel[:, i].var()
        feature_dict[key + "_vel_skewness"] = st.skew(vel[:, i])
        feature_dict[key + "_vel_kurtosis"] = st.kurtosis(vel[:, i])
        # Accel
        feature_dict[key + "_accel_mean"] = acc[:, i].mean()
        feature_dict[key + "_accel_var"] = acc[:, i].var()
        feature_dict[key + "_accel_skewness"] = st.skew(acc[:, i])
        feature_dict[key + "_accel_kurtosis"] = st.kurtosis(acc[:, i])

        # Need the skewness and kurtosis of all mfcc means
        if i > 0:
            mfcc_means.append(feature_dict[key + "_mean"])

    feature_dict["mfcc_skewness"] = st.skew(mfcc_means)
    feature_dict["mfcc_kurtosis"] = st.kurtosis(mfcc_means)
    return feature_dict
Ejemplo n.º 4
0
def AAcal(seqcont):
    v = []
    for i in range(len(seqcont)):
        vtar = seqcont[i]
        vtarv = []
        vtar7 = 0
        vtar8 = 0
        vtar9 = 0
        s = pd.Series(vtar)
        vtar3 = np.mean(
            vtar)  # These 4 dimensions are relevant statistical terms
        vtar4 = st.kurtosis(vtar)
        vtar5 = np.var(vtar)
        vtar6 = st.skew(vtar)
        #for p in range(len(vtar)): # These 3 dimensions are inspired by PAFIG algorithm
        #vtar7=vtar[p]**2+vtar7
        #if vtar[p]>va:
        #vtar8=vtar[p]**2+vtar8
        #else:
        #vtar9=vtar[p]**2+vtar9
        vcf1 = []
        vcf2 = []
        for j in range(len(vtar) - 1):  #Sequence-order-correlation terms
            vcf1.append((vtar[j] - vtar[j + 1]))
        for k in range(len(vtar) - 2):
            vcf2.append((vtar[k] - vtar[k + 2]))
        vtar10 = np.mean(vcf1)
        vtar11 = np.var(vcf1)
        vtar11A = st.kurtosis(vcf1)
        vtar11B = st.skew(vcf1)
        vtar12 = np.mean(vcf2)
        vtar13 = np.var(vcf2)
        vtar13A = st.kurtosis(vcf2)
        vtar13B = st.skew(vcf2)
        vtarv.append(vtar3)
        vtarv.append(vtar4)
        vtarv.append(vtar5)
        vtarv.append(vtar6)
        #vtarv.append(vtar7/len(vtar))
        #vtarv.append(vtar8/len(vtar))
        #vtarv.append(vtar9/len(vtar))
        vtarv.append(vtar10)
        vtarv.append(vtar11)
        vtarv.append(vtar11A)
        vtarv.append(vtar11B)
        vtarv.append(vtar12)
        vtarv.append(vtar13)
        vtarv.append(vtar13A)
        vtarv.append(vtar13B)
        v.append(vtarv)
    return v
Ejemplo n.º 5
0
    def __extract_features(self, mfcc_data: dict) -> dict:
        """
        Extracts the features from the MFCC data
        :param mfcc_data: MFCC data for an audio chunk
        :return: the extracted features from the input MFCC data
        """
        features, mfcc_means = {}, []

        for i in range(0, 14):
            key = "energy" if i == 0 else "mfcc_" + str(i)

            features.update(
                self.__get_summary_stats(key, mfcc_data["mfcc_features"], i))
            features.update(
                self.__get_summary_stats(key + "_velocity",
                                         mfcc_data["velocity"], i))
            features.update(
                self.__get_summary_stats(key + "_acceleration",
                                         mfcc_data["acceleration"], i))

            if i > 0:
                mfcc_means.append(features[key + "_mean"])

        features["mfcc_skewness"] = st.skew(np.array(mfcc_means))
        features["mfcc_kurtosis"] = st.kurtosis(mfcc_means)

        return features
Ejemplo n.º 6
0
def get_data(column, np_values, alpha):

    mvs = bayes_mvs(np_values, alpha)

    #report these metrics
    output = [
        present("Column", column),
        present("Length", len(np_values)),
        present("Unique", len(np.unique(np_values))),
        present("Min", np_values.min()),
        present("Max", np_values.max()),
        present("Mid-Range", (np_values.max() - np_values.min())/2),
        present("Range", np_values.max() - np_values.min()),
        present("Mean", np_values.mean()),
        present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])),
        present("Variance", mvs[1][0]),
        present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])),
        present("StdDev", mvs[2][0]),
        present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])),
        present("Mode", stats.mode(np_values)[0][0]),
        present("Q1", stats.scoreatpercentile(np_values, 25)),
        present("Q2", stats.scoreatpercentile(np_values, 50)),
        present("Q3", stats.scoreatpercentile(np_values, 75)),
        present("Trimean", trimean(np_values)),
        present("Minhinge", midhinge(np_values)),
        present("Skewness", stats.skew(np_values)),
        present("Kurtosis", stats.kurtosis(np_values)),
        present("StdErr", sem(np_values)),
        present("Normal-P-value", normaltest(np_values)[1])
        ]
    return output
Ejemplo n.º 7
0
def get_data(column, np_values, alpha):

    mvs = bayes_mvs(np_values, alpha)

    #report these metrics
    output = [
        present("Column", column),
        present("Length", len(np_values)),
        present("Unique", len(np.unique(np_values))),
        present("Min", np_values.min()),
        present("Max", np_values.max()),
        present("Mid-Range", (np_values.max() - np_values.min()) / 2),
        present("Range",
                np_values.max() - np_values.min()),
        present("Mean", np_values.mean()),
        present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])),
        present("Variance", mvs[1][0]),
        present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])),
        present("StdDev", mvs[2][0]),
        present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])),
        present("Mode",
                stats.mode(np_values)[0][0]),
        present("Q1", stats.scoreatpercentile(np_values, 25)),
        present("Q2", stats.scoreatpercentile(np_values, 50)),
        present("Q3", stats.scoreatpercentile(np_values, 75)),
        present("Trimean", trimean(np_values)),
        present("Minhinge", midhinge(np_values)),
        present("Skewness", stats.skew(np_values)),
        present("Kurtosis", stats.kurtosis(np_values)),
        present("StdErr", sem(np_values)),
        present("Normal-P-value",
                normaltest(np_values)[1])
    ]
    return output
Ejemplo n.º 8
0
 def _extract_one(self, sourcepc, neighborhood):
     if neighborhood:
         z = sourcepc[point][self.data_key]['data'][neighborhood]
         kurtosis_z = stat.kurtosis(z)
     else:
         kurtosis_z = np.NaN
     return kurtosis_z
Ejemplo n.º 9
0
 def _get_reward(self, real_values: dict, i: int):
     """
     Get the reward returned after previous action
     """
     df = pd.read_csv('output.csv', skiprows=[0], sep=';')
     last_return = df['price'].values[-1] / self.init_price - 1
     reward = {'return': last_return}
     if i < 100:  # + 1
         return reward
     #
     returns = self.sim_df.tail(99)['return'].dropna().values + [
         last_return
     ]
     mu, sigma = norm.fit(returns)
     skew, kurtosis = st.skew(returns), st.kurtosis(returns)
     # autocorr = f_autocorr(np.abs(returns))[0, 1]
     reward.update({
         'mu': mu,
         'sigma': sigma,
         'skew': skew,
         'kurtosis': kurtosis,
         # 'autocorr': autocorr,
     })
     # error = {
     #     k: np.abs((reward[k] - real_values[k])**2 / real_values[k])
     #     for k, v in reward.items() if k != 'return'
     # }
     sub_df = self.df.iloc[i - 100:i]
     error = {
         k: ((reward[k] - sub_df[k].mean()) / sub_df[k].std())**2
         for k, v in reward.items() if k != 'return'
     }
     reward['error'] = -sum(error.values())
     os.remove('output.csv')
     return reward
Ejemplo n.º 10
0
def get_stats_numpy(data, zero):
    mean = np.mean(data)
    median = np.median(data)
    std = np.std(data)
    var = np.var(data)
    skew = stats.skew(data)
    kurt = stats.kurtosis(data)
    pc = [25, 50, 75, 90]
    percentiles = np.array(np.percentile(data, pc))
    silences = np.count_nonzero(np.asarray(data) == zero)
    silence_mean = np.mean(
        list(sum(1 for _ in g) for k, g in groupby(data) if k == zero))
    longest_silence = max(
        sum(1 for _ in g) for k, g in groupby(data)
        if k == 0) if silences > 0 else 0
    shortest_silence = min(
        sum(1 for _ in g) for k, g in groupby(data)
        if k == 0) if silences > 0 else 0

    # print("Mean: " + str(mean))
    # print("Media: " + str(median))
    # print("StdDev: " + str(std))
    # print("Variance: " + str(var))
    # print("Skewness: " + str(skew))
    # print("Kurtosis: " + str(kurt))
    # print("Pc25: " + str(percentiles[0]))
    # print("Pc50: " + str(percentiles[1]))
    # print("Pc75: " + str(percentiles[2]))

    features = np.hstack(
        (mean, median, std, var, skew, kurt, percentiles, silences,
         silence_mean, longest_silence, shortest_silence))

    return features
def computeFeatureVector(data):
    print "computeFeatureVector"
    stDeviation = np.std(np.array(data)) #Standard Deviation
    coVariant = st.variation(np.array(data)) # Coefficients of variation
    kurtosis = st.kurtosis(np.array(data)) #kurtosis
    features = ft.cFeatures(stDeviation, coVariant, kurtosis)
    return features
Ejemplo n.º 12
0
def get_mean_var_skew_kurt(np_array):
    return {
        "mean": np_array.mean(),
        "var": np_array.var(),
        "skewness": st.skew(np_array),
        "kurtosis": st.kurtosis(np_array),
    }
def get_stats_json(data):
    mean = np.mean(data)
    median = np.median(data)
    std = np.std(data)
    var = np.var(data)
    skew = stats.skew(data)
    kurt = stats.kurtosis(data)
    pc = [25,50,75]
    percentiles = np.array(np.percentile(data, pc))
    silences = np.count_nonzero(np.asarray(data)==0.0)
    longest_silence = max(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0
    shortest_silence = min(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0
    #print("Mean: " + str(mean))
    #print("Media: " + str(median))
    #print("StdDev: " + str(std))
    #print("Variance: " + str(var))
    #print("Skewness: " + str(skew))
    #print("Kurtosis: " + str(kurt))
    #print("Pc25: " + str(percentiles[0]))
    #print("Pc50: " + str(percentiles[1]))
    #print("Pc75: " + str(percentiles[2]))
    
    statistiscs = {
        'mean': mean,
        'median': median,
        'std': std,
        'var': var,
        'skew': skew,
        'kurt': kurt,
        'pc25': percentiles[0],
        'pc50': percentiles[1],
        'pc75': percentiles[2],
    }
    
    return statistiscs
Ejemplo n.º 14
0
def get_launch_feature(row):
    feature = pd.Series()
    feature['user_id'] = list(row['user_id'])[0]
    # feature['launch_count'] = len(row)
    diff_day = np.diff(row['day'])
    if len(diff_day) != 0:
        feature['launch_day_diff_mean'] = np.mean(diff_day)
        feature['launch_day_diff_std'] = np.std(diff_day)
        feature['launch_day_diff_max'] = np.max(diff_day)
        feature['launch_day_diff_min'] = np.min(diff_day)
        feature['launch_day_diff_kur'] = stats.kurtosis(diff_day)
        feature['launch_day_diff_ske'] = stats.skew(diff_day)
        feature['launch_day_diff_last'] = diff_day[-1]
        # feature['launch_day_cut_max_day'] = day_cut_max_day(row['day'])
        feature['launch_sub_register'] = np.subtract(np.max(row['max_day']),
                                                     np.max(row['day']))
    else:
        feature['launch_day_diff_mean'] = 0
        feature['launch_day_diff_std'] = 0
        feature['launch_day_diff_max'] = 0
        feature['launch_day_diff_min'] = 0
        feature['launch_day_diff_kur'] = 0
        feature['launch_day_diff_ske'] = 0
        feature['launch_day_diff_last'] = 0
        # feature['launch_day_cut_max_day'] = day_cut_max_day(row['day'])
        feature['launch_sub_register'] = np.subtract(np.max(row['max_day']),
                                                     np.max(row['day']))

    launch_day_count = np.bincount(row['day'])[np.nonzero(
        np.bincount(row['day']))[0]]
    feature['launch_day_count_mean'] = np.mean(launch_day_count)
    feature['launch_day_count_max'] = np.max(launch_day_count)
    feature['launch_day_count_std'] = np.std(launch_day_count)
    return feature
def base_stats(data_1):
    stats_dict = np.zeros((data_1.shape[0], 4))
    for i in range(data_1.shape[0]):
        stats_dict[i, 0] = st.skew(data_1[i], bias=False)
        stats_dict[i, 1] = st.kurtosis(data_1[i], bias=False)
        stats_dict[i, 2] = np.max(data_1[i])
        stats_dict[i, 3] = np.std(data_1[i])
    return stats_dict
Ejemplo n.º 16
0
 def __get_summary_stats(key: str, data: np.array,
                         coefficient: int) -> dict:
     return {
         key + "_mean": data[:, coefficient].mean(),
         key + "_variance": data[:, coefficient].var(),
         key + "_skewness": st.skew(data[:, coefficient]),
         key + "_kurtosis": st.kurtosis(data[:, coefficient])
     }
 def extract(self, sourcepc, neighborhood, targetpc, targetindex,
             volume_description):
     if neighborhood:
         z = sourcepc[point][self.data_key]['data'][neighborhood]
         kurtosis_z = stat.kurtosis(z)
     else:
         kurtosis_z = np.NaN
     return kurtosis_z
Ejemplo n.º 18
0
def kurtosis_normal_distribution(df, features, crypto_name, output_path):
    res = {'feature': [], 'kurtosis_of_n_distrib': []}
    for feature in features:
        df = df.dropna(subset=[feature])
        stat, p = stats.kurtosis(df[feature].values)
        res['feature'].append(feature)
        res['kurtosis_of_n_distrib'].append(stat)
    pd.DataFrame(data=res).to_csv(output_path + crypto_name + ".csv",
                                  sep=",",
                                  index=False)
Ejemplo n.º 19
0
def get_mfcc_features(filename):
    feature_dict = {}
    (rate, sig) = wav.read(filename)

    if sig.ndim == 2:
        # wav is stereo so average over both channels 
        mfcc_feat_chan0 = mfcc(sig[:,0], rate, numcep=15, appendEnergy=True)
        mfcc_feat_chan1 = mfcc(sig[:,1], rate, numcep=15, appendEnergy=True)
        mfcc_feat = (mfcc_feat_chan0 + mfcc_feat_chan1) / 2
    else:
        mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True)

    # Velocity is the difference between timestep t+1 and t for each mfcc_feat / 2
    vel = (mfcc_feat[:-1,:] - mfcc_feat[1:,:]) / 2.0
    # Acceleration is the difference between timestep t+1 and t for each velocity / 2
    acc = (vel[:-1,:] - vel[1:,:]) / 2.0
    mfcc_means = []
    for i in xrange(0, 14):
        key = "energy" if i == 0 else "mfcc" + str(i)
        # mfcc
        feature_dict[key + "_mean"]     = mfcc_feat[:, i].mean()
        feature_dict[key + "_var"]      = mfcc_feat[:, i].var()
        feature_dict[key + "_skewness"] = st.skew(mfcc_feat[:, i])
        feature_dict[key + "_kurtosis"] = st.kurtosis(mfcc_feat[:, i])
        # Vel
        feature_dict[key + "_vel_mean"]     = vel[:, i].mean()
        feature_dict[key + "_vel_var"]      = vel[:, i].var()
        feature_dict[key + "_vel_skewness"] = st.skew(vel[:, i])
        feature_dict[key + "_vel_kurtosis"] = st.kurtosis(vel[:, i])
        # Accel
        feature_dict[key + "_accel_mean"]     = acc[:, i].mean()
        feature_dict[key + "_accel_var"]      = acc[:, i].var()
        feature_dict[key + "_accel_skewness"] = st.skew(acc[:, i])
        feature_dict[key + "_accel_kurtosis"] = st.kurtosis(acc[:, i])

        # Need the skewness and kurtosis of all mfcc means 
        if i > 0:
            mfcc_means.append(feature_dict[key + "_mean"])
    
    feature_dict["mfcc_skewness"] = st.skew(mfcc_means)
    feature_dict["mfcc_kurtostis"] = st.kurtosis(mfcc_means)
    return feature_dict
Ejemplo n.º 20
0
def get_feature(region_props, n_region, feature_name):
    feature = [0] * 5
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature[MAX] = format_2f(np.max(feature_values))
        feature[MEAN] = format_2f(np.mean(feature_values))
        feature[VARIANCE] = format_2f(np.var(feature_values))
        feature[SKEWNESS] = format_2f(st.skew(np.array(feature_values)))
        feature[KURTOSIS] = format_2f(st.kurtosis(np.array(feature_values)))

    return feature
def get_feature(region_props, n_region, feature_name):
    feature = [0] * 5
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature[MAX] = utils.format_2f(np.max(feature_values))
        feature[MEAN] = utils.format_2f(np.mean(feature_values))
        feature[VARIANCE] = utils.format_2f(np.var(feature_values))
        feature[SKEWNESS] = utils.format_2f(st.skew(np.array(feature_values)))
        feature[KURTOSIS] = utils.format_2f(st.kurtosis(np.array(feature_values)))

    return feature
Ejemplo n.º 22
0
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
Ejemplo n.º 23
0
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
def extract_features_for_pqrst(row, pqrsts):
    features = []

    p = [x[0] for x in pqrsts]
    q = [x[1] for x in pqrsts]
    r = [x[2] for x in pqrsts]
    s = [x[3] for x in pqrsts]
    t = [x[4] for x in pqrsts]

    pqrsts = pqrsts[:min(NB_RR, len(pqrsts))]
    row = low_pass_filtering(row)
    row = high_pass_filtering(row)
    for i in range(len(pqrsts)):
        pq = row[p[i]:q[i]]
        st = row[s[i]:t[i]]
        pt = row[p[i]:t[i]]
        pmax = np.amax(pq)
        pmin = np.amax(pq)
        tmax = np.amax(st)
        tmin = np.amax(st)

        p_mean = np.mean(pq)
        t_mean = np.mean(st)

        features += [
            # features for PQ interval
            pmax,
            pmax / row[r[i]],
            pmin / pmax,
            p_mean,
            p_mean / pmax,
            np.std(pq),
            common.mode(pq),

            # feature for ST interval
            tmax,
            tmax / row[r[i]],
            tmin / tmax,
            t_mean,
            t_mean / tmax,
            np.std(st),
            common.mode(st),
            p_mean / t_mean,

            # features for whole PQRST interval
            stats.skew(pt),
            stats.kurtosis(pt)
        ]

    for i in range(NB_RR - len(pqrsts)):
        features += [0 for x in range(17)]

    return features
Ejemplo n.º 25
0
def get_feature(region_props, n_region, feature_name):
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature = feature_tuple(
            MAX=format_2f(np.max(feature_values)),
            MEAN=format_2f(np.mean(feature_values)),
            VARIANCE=format_2f(np.var(feature_values)),
            SKEWNESS=format_2f(st.skew(np.array(feature_values))),
            KURTOSIS=format_2f(st.kurtosis(np.array(feature_values))))
    else:
        feature = feature_tuple(*([0] * 5))
    return feature
Ejemplo n.º 26
0
def getFourMoments(sequence, ax=1):
    finalArray = [
        np.mean(sequence, axis=ax),
        np.var(sequence, axis=ax),
        skew(sequence, axis=ax),
        kurtosis(sequence, axis=ax),
        sem(sequence, axis=ax),
    ]
    if ax != None:
        finalArray = np.array(finalArray)
        finalArray = finalArray.T
        return np.concatenate((finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax)
    finalArray.extend(mquantiles(sequence, axis=ax))
    return np.array(finalArray)
Ejemplo n.º 27
0
 def _calculateStatistics(self, img, haralick=False, zernike=False):
     result = []
     # 3-bin histogram
     result.extend(mquantiles(img))
     # First four moments
     result.extend([img.mean(), img.var(), skew(img, axis=None), kurtosis(img, axis=None)])
     # Haralick features
     if haralick:
         integerImage = dtype.img_as_ubyte(img)
         result.extend(texture.haralick(integerImage).flatten())
     # Zernike moments
     if zernike:
         result.extend(zernike_moments(img, int(self.rows) / 2 + 1))
     return result
def get_feature(region_props, n_region, feature_name):
    """
    Returns: 
        feature:list of [max, mean, variance, skewness, kurtosis]
    """
    feature = [0] * 5
    if n_region > 0:
        feature_values = [region[feature_name] for region in region_props]
        feature[MAX] = utils.format_2f(np.max(feature_values))
        feature[MEAN] = utils.format_2f(np.mean(feature_values))
        feature[VARIANCE] = utils.format_2f(np.var(feature_values))
        feature[SKEWNESS] = utils.format_2f(st.skew(np.array(feature_values)))
        feature[KURTOSIS] = utils.format_2f(st.kurtosis(np.array(feature_values)))

    return feature
Ejemplo n.º 29
0
def getFourMoments(sequence, ax=1):
    finalArray = [
        np.mean(sequence, axis=ax),
        np.var(sequence, axis=ax),
        skew(sequence, axis=ax),
        kurtosis(sequence, axis=ax),
        sem(sequence, axis=ax)
    ]
    if ax != None:
        finalArray = np.array(finalArray)
        finalArray = finalArray.T
        return np.concatenate(
            (finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax)
    finalArray.extend(mquantiles(sequence, axis=ax))
    return np.array(finalArray)
def generate_moment(dataset, NO_OF_PROPERTIES, NO_MOMENTS):
    element_count = len(dataset)
    moments = np.zeros((element_count, NO_OF_PROPERTIES, NO_MOMENTS))
    # TODO debugging here only
    for row in range(element_count):
        moments[row, :, :] = np.array([
            scipy.mean(dataset[row][0:NO_OF_PROPERTIES, :], axis=1),
            # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1),
            # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1),
            # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1),
            scipy.std(dataset[row][0:NO_OF_PROPERTIES, :], axis=1),
            stats.skew(dataset[row][0:NO_OF_PROPERTIES, :], axis=1),
            stats.kurtosis(dataset[row][0:NO_OF_PROPERTIES, :], axis=1)
        ]).transpose()
    return moments
Ejemplo n.º 31
0
def compute_features(dataframe, columns, bins, model, model_type="KMeans"):
    """
    Compute the features of the specified columns from a Pandas dataframe using the given model.

    :param dataframe: Pandas dataframe.
    :param columns: List of the columns name.
    :param bins: Number of bins.
    :param model: Model.
    :param model_type: Type of the model.
    :return: Features.
    """
    import numpy as np
    import scipy.stats.stats as st
    row = []
    for j, column in enumerate(columns):
        column_df = dataframe[column]
        X = column_df.values

        if model is not None:
            if model_type == "KMeans":
                r = model[column].predict(X.reshape(-1, 1))

            if model_type == "PolynomialFeatures":
                r = model[column].transform(X.reshape(-1, 1)).tolist()
        else:
            r = X

        # compute feature histogram
        # counts, bin_edges = np.histogram(result, bins=bins[j], density=False)
        # column_hist = counts

        # compute normalized feature histogram
        counts, bin_edges = np.histogram(r, bins=bins[j], density=True)
        column_hist = counts * np.diff(bin_edges)

        row.extend(column_hist)

        # add extra features
        kurtosis = st.kurtosis(X.reshape(-1, 1))[0]
        skew = st.skew(X.reshape(-1, 1))[0]
        min_value = column_df.min()
        max_value = column_df.max()
        mean_value = column_df.mean()
        median_value = column_df.median()
        row.extend(
            [kurtosis, skew, min_value, max_value, mean_value, median_value])
    return row
Ejemplo n.º 32
0
def extract_features(data, y, window_len, task2=False):  #num_windows):
    i = 0
    #window_len = len(data)/(num_windows/2)
    if task2:
        num_windows = len(data) - window_len + 1
    else:
        num_windows = len(data) / (window_len / 2)
    #print 'num_windows = 208, window_len = ' , str(len(data)/(208/2))
    #print 'now num_windows = '+ str(num_windows)+', window_len = '+str(window_len)
    features = []
    targets = []
    for n in range(num_windows):
        win = data[i:i + window_len]
        if task2:
            target = y.iloc[i]
        else:
            try:
                target = int(y[i:i + window_len].mode())
            except:
                target = int(y[i:i + window_len])
        targets.append(target)
        for c in data.columns:
            s = np.array(win[c])
            rms_val = rms(s)
            (min_max, peak, peaknum) = min_max_mean(s)
            mean = s.mean()
            std = s.std()
            skew = st.skew(s)
            kurtosis = st.kurtosis(s)
            coefficients = std / mean
            logpower = np.log10((s**2)).sum()
            new_features = [
                rms_val, min_max, mean, std, skew, kurtosis, peak, peaknum,
                coefficients, logpower
            ]
            #new_features = [rms_val, min_max, mean, std]
            features.append(new_features)
        if (task2):
            i += 1
        else:
            i += window_len / 2
    features = np.array(features)
    features.shape = num_windows, 120  #48#72
    targets = np.array(targets)
    return features, targets
Ejemplo n.º 33
0
def get_create_feature(row):
    feature = pd.Series()
    feature['user_id'] = list(row['user_id'])[0]
    # feature['create_count'] = len(row)
    diff_day = np.diff(row['day'])
    if len(diff_day) != 0:
        # feature['create_day_diff_mean'] = np.mean(diff_day)
        # feature['create_day_diff_std'] = np.std(diff_day)
        # feature['create_day_diff_min'] = np.min(diff_day)
        # feature['create_day_diff_mode'] = stats.mode(interval_data)[0][0]
        feature['create_day_diff_ske'] = stats.skew(diff_day)
        feature['create_day_diff_kur'] = stats.kurtosis(diff_day)
        # feature['create_day_diff_max'] = np.max(diff_day)
        feature['create_day_last'] = diff_day[-1]
        feature['create_sub_register'] = np.subtract(np.max(row['max_day']),
                                                     np.max(row['day']))
        feature['create_mode'] = stats.mode(row['day'])[0][0]
        return feature
Ejemplo n.º 34
0
 def __init__(self):
     """
     Init the environment, this is a `_reset` function however we don't need
     the `reset` function, so I put the code here
     """
     input_path = 'data/vnindex.csv'
     df = pd.read_csv(input_path)
     df['return'] = df['close'].pct_change()
     for index, row in df.iterrows():
         if index < 100: continue
         data = df.iloc[index - 100:index]['return']
         mu, sigma = norm.fit(data)
         skew, kurtosis = st.skew(data), st.kurtosis(data)
         autocorr = f_autocorr(data.abs())[0, 1]
         df.loc[index, 'mu'] = mu
         df.loc[index, 'sigma'] = sigma
         df.loc[index, 'skew'] = skew
         df.loc[index, 'kurtosis'] = kurtosis
         df.loc[index, 'autocorr'] = autocorr
     #
     df.to_csv(input_path, index=False)
     self.df = df
     # self.df = pd.read_csv(input_path)
     self.sim_df = pd.DataFrame()
     # init parameters for fms
     self.total_number = 10000
     self.init_price = 100000
     # The Space object corresponding to valid observations
     self.obs_mu = [-0.0102, -0.0011, 0.0001, 0.0016, 0.0140]
     self.obs_sigma = [0.0028, 0.0084, 0.0120, 0.0159, 0.0492]
     self.obs_skew = [-2.0660, -0.2824, 0.0388, 0.3409, 2.6633]
     self.obs_kurtosis = [-1.47, -0.24, 0.34, 1.40, 16.19]
     self.observation_space = None
     self.observation_space_n = \
         len(self.obs_mu) * \
         len(self.obs_sigma) * \
         len(self.obs_skew) * \
         len(self.obs_kurtosis)
     # A tuple corresponding to the min and max possible rewards
     self.reward_range = (-np.inf, 0)
     #
     self.zero_pct = 0.3
     self.herding_pct = 0.3
Ejemplo n.º 35
0
 def _calculateStatistics(self, img, haralick=False, zernike=False):
     result = []
     #3-bin histogram
     result.extend(mquantiles(img))
     #First four moments
     result.extend([
         img.mean(),
         img.var(),
         skew(img, axis=None),
         kurtosis(img, axis=None)
     ])
     #Haralick features
     if haralick:
         integerImage = dtype.img_as_ubyte(img)
         result.extend(texture.haralick(integerImage).flatten())
     #Zernike moments
     if zernike:
         result.extend(zernike_moments(img, int(self.rows) / 2 + 1))
     return result
Ejemplo n.º 36
0
def compute_features(sub_df, columns, bins, model, model_type="KMeans"):
    import scipy.stats.stats as st
    row = []
    for j, column in enumerate(columns):
        column_df = sub_df[column]
        X = column_df.values

        if model is not None:
            if model_type == "KMeans":
                result = model[column].predict(X.reshape(-1, 1))

            if model_type == "PolynomialFeatures":
                result = model[column].transform(X.reshape(-1, 1)).tolist()
        else:
            result = X

        # compute feature histogram
        #counts, bin_edges = np.histogram(result, bins=bins[j], density=False)
        #column_hist = counts

        # compute normalized feature histogram
        counts, bin_edges = np.histogram(result, bins=bins[j], density=True)
        column_hist = counts * np.diff(bin_edges)

        row.extend(column_hist)

        # add extra features
        kurtosis = st.kurtosis(X.reshape(-1, 1))[0]
        skew = st.skew(X.reshape(-1, 1))[0]
        min_value = column_df.min()
        max_value = column_df.max()
        mean_value = column_df.mean()
        median_value = column_df.median()
        row.extend(
            [kurtosis, skew, min_value, max_value, mean_value, median_value])
    return row
def wavelet_transform(data_1, type_w, level_w):
    coeff_mean = np.zeros((data_1.shape[0], level_w + 1))
    coeff_std = np.zeros((data_1.shape[0], level_w + 1))
    coeff_skew = np.zeros((data_1.shape[0], level_w + 1))
    coeff_kurt = np.zeros((data_1.shape[0], level_w + 1))
    for i in range(data_1.shape[0]):
        """ Wavelet decomposition """
        w = pywt.wavedec(data_1[i], wavelet=type_w, level=level_w)
        for j in range(0, len(w)):
            coeff_mean[i, j] = np.mean(w[j])
            coeff_std[i, j] = np.std(w[j])
            coeff_skew[i, j] = st.skew(w[j], bias=False)
            coeff_kurt[i, j] = st.kurtosis(w[j], bias=False)
    """ Factor analysis on the wavelet coefficients
        Taking the first component"""
    fa_mean_coeff = FactorAnalysis(
        n_components=1).fit(coeff_mean).transform(coeff_mean)
    fa_std_coeff = FactorAnalysis(
        n_components=1).fit(coeff_std).transform(coeff_std)
    fa_skew_coeff = FactorAnalysis(
        n_components=1).fit(coeff_skew).transform(coeff_skew)
    fa_kurt_coeff = FactorAnalysis(
        n_components=1).fit(coeff_kurt).transform(coeff_kurt)
    return fa_mean_coeff, fa_std_coeff, fa_skew_coeff, fa_kurt_coeff
Ejemplo n.º 38
0
def noiseMeter(data=None):
    # get signal statistics to assess noise

    cols = ['Max', 'Std', 'Max/Std', 'Kurt', 'Skew']
    table = pandas.DataFrame(index=range(data.shape[1]),
                             columns=cols,
                             dtype='float64')

    # maximum amplitude
    table['Max'] = np.abs(data).max(axis=0)  #/sigma2

    # standard deviation
    table['Std'] = data.std(ddof=1, axis=0)

    # max/std
    table['Max/Std'] = table['Max'] / table['Std']

    # kurtosis
    table['Kurt'] = stats.kurtosis(data, bias=False, axis=0)

    # skewness
    table['Skew'] = np.abs(stats.skew(data, bias=False, axis=0))

    return table
Ejemplo n.º 39
0
def get_mean_var_skew_kurt(np_array):
    return {"mean":np_array.mean(),
            "var":np_array.var(),
            "skewness":st.skew(np_array),
            "kurtosis":st.kurtosis(np_array),}
Ejemplo n.º 40
0
 def contrast(img):
     kurt = kurtosis(img,axis=None,fisher=False)
     var = img.var()
     return var / np.power(kurt, 1. / 4.)
Ejemplo n.º 41
0
def signal_stats(signal=None):
    """Compute various metrics describing the signal.

    Parameters
    ----------
    signal : array
        Input signal.

    Returns
    -------
    mean : float
        Mean of the signal.
    median : float
        Median of the signal.
    max : float
        Maximum signal amplitude.
    var : float
        Signal variance (unbiased).
    std_dev : float
        Standard signal deviation (unbiased).
    abs_dev : float
        Absolute signal deviation.
    kurtosis : float
        Signal kurtosis (unbiased).
    skew : float
        Signal skewness (unbiased).

    """

    # check inputs
    if signal is None:
        raise TypeError("Please specify an input signal.")

    # ensure numpy
    signal = np.array(signal)

    # mean
    mean = np.mean(signal)

    # median
    median = np.median(signal)

    # maximum amplitude
    maxAmp = np.abs(signal - mean).max()

    # variance
    sigma2 = signal.var(ddof=1)

    # standard deviation
    sigma = signal.std(ddof=1)

    # absolute deviation
    ad = np.sum(np.abs(signal - median))

    # kurtosis
    kurt = stats.kurtosis(signal, bias=False)

    # skweness
    skew = stats.skew(signal, bias=False)

    # output
    args = (mean, median, maxAmp, sigma2, sigma, ad, kurt, skew)
    names = ('mean', 'median', 'max', 'var', 'std_dev', 'abs_dev', 'kurtosis',
             'skewness')

    return utils.ReturnTuple(args, names)
    args = parser.parse_args()

    segment_boundaries = np.loadtxt(args.segments_filename, usecols=(2, 3))
    segment_lengths = segment_boundaries[:, 1] - segment_boundaries[:, 0]
    count = len(segment_lengths)
    mean = np.mean(segment_lengths)
    median = np.median(segment_lengths)

    print("num segments read: {:d}".format(count))
    print("total time (h): {:.2f}".format(np.sum(segment_lengths) / 3600))
    print("mean (s): {:.2f}".format(mean))
    print("median (s): {:.2f}".format(median))
    print("skew: {:.2f}".format(st.skew(segment_lengths, bias=True)))
    print("skew [corrected]: {:.2f}".format(st.skew(segment_lengths, bias=False)))
    print("skewtest: {}".format(st.skewtest(segment_lengths)))
    print("kurtosis: {:.2f}".format(st.kurtosis(segment_lengths)))

    # Figure out how many segments would fill the desired number of hours,
    # then round up to the nearest 10k.
    possible_num_hours_segmentations = (100, 300, 500, 1000, 1500, 3000)
    print("=== from mean ===")
    for num_hours in possible_num_hours_segmentations:
        num_segments = int(num_hours * 3600 / mean)
        print("{:d} h: {:d} ({:d}) segments".format(num_hours, round(num_segments, -4), num_segments))
    print("=== from median ===")
    for num_hours in possible_num_hours_segmentations:
        num_segments = int(num_hours * 3600 / median)
        print("{:d} h: {:d} ({:d}) segments".format(num_hours, round(num_segments, -4), num_segments))

    gp.plot((segment_lengths, {"histogram": "freq", "binwidth": 1}))
Ejemplo n.º 43
0
	def evaluate(self, t):
		"""
		t地点における短期的尖度を返します.
		"""
		d = self.asset.getPreviousData(t, self.__length)
		return stats.kurtosis(d, fisher=self.__fisher)
Ejemplo n.º 44
0
    if maxInterim > maxValue:
        maxValue = maxInterim
    minInterim = min(my_data[:, x])
    if minInterim < minValue:
        minValue = minInterim
        

binWidth = (maxValue - minValue) / (numBins)
newBins = np.arange(minValue, maxValue, binWidth)

# TODO process array only once for speedup?
for x in range (0, numModels):
    frequency = plt.hist(my_data[:, x], bins=newBins, histtype='step', normed=True, label=labels[x]);
    b[x, 0] = mean(my_data[:, x]);
    b[x, 1] = var(my_data[:, x]);
    b[x, 2] = skew(my_data[:, x]);
    b[x, 3] = kurtosis(my_data[:, x]);
    b[x, 4] = entropy(frequency[0])

plt.title(csvString + " Frequency")
plt.legend()
deg = u'\N{DEGREE SIGN}'

plt.xlabel("Airflow Rate (cfm)")
plt.ylabel("Frequency")

for i in range (0, 5):
    print(b[:, i])

plt.show()