def getdata(data,samplerate=44100): data=[(float)(i) for i in data] sound_list["samplerate"] = samplerate sound_list["wavedata"] = data sound_list["number_of_samples"] = (sound_list["wavedata"]).shape[0] sound_list["song_length"] = int(sound_list["number_of_samples"] / samplerate) ans=[] zcr,ts=zero_crossing_rate(data,1024,sound_list["samplerate"]) ans=[np.min(zcr),np.max(zcr),np.mean(zcr),np.std(zcr),np.median(zcr),st.skew(zcr),st.kurtosis(zcr)] rms,ts=root_mean_square(data,1024,sound_list["samplerate"]) rms=[np.min(rms),np.max(rms),np.mean(rms),np.std(rms),np.median(rms),st.skew(rms),st.kurtosis(rms)] ans=ans+rms sc,ts=spectral_centroid(data,1024,sound_list["samplerate"]) rms=[np.min(sc),np.max(sc),np.mean(sc),np.std(sc),np.median(sc),st.skew(sc),st.kurtosis(sc)] ans=ans+rms sr,ts=spectral_rolloff(data,1024,sound_list["samplerate"]) rms=[np.min(sr),np.max(sr),np.mean(sr),np.std(sr),np.median(sr),st.skew(sr),st.kurtosis(sr)] ans=ans+rms sf,ts=spectral_flux(data,1024,sound_list["samplerate"]) rms=[np.min(sf),np.max(sf),np.mean(sf),np.std(sf),np.median(sf),st.skew(sf),st.kurtosis(sf)] ans=ans+rms x.set_input_data(data) mfcc=x.MFCCs for pop in mfcc: for poop in pop: ans.append(poop) return ans
def get_transform_funcs(train, cols): transform_funcs = [] for col in cols: vector = [row[col] for row in train] transforms = [(skew(vector, bias=False), "none"), (skew(log_transform(vector), bias=False), "log"), (skew(sqrt_transform(vector), bias=False), "sqrt")] best_transform = sorted(transforms)[0][1] transform_funcs.append(best_transform) return transform_funcs
def get_mfcc_features(filename): feature_dict = {} (rate, sig) = wav.read(filename) if sig.ndim == 2: # wav is stereo so average over both channels try: mfcc_feat_chan0 = mfcc(sig[:, 0], rate, numcep=15, appendEnergy=True) mfcc_feat_chan1 = mfcc(sig[:, 1], rate, numcep=15, appendEnergy=True) mfcc_feat = (mfcc_feat_chan0 + mfcc_feat_chan1) / 2 except IndexError: print('Index error') mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True) else: mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True) # Velocity is the difference between timestep t+1 and t for each mfcc_feat / 2 vel = (mfcc_feat[:-1, :] - mfcc_feat[1:, :]) / 2.0 # Acceleration is the difference between timestep t+1 and t for each velocity / 2 acc = (vel[:-1, :] - vel[1:, :]) / 2.0 mfcc_means = [] for i in range(0, 14): key = "energy" if i == 0 else "mfcc" + str(i) # mfcc feature_dict[key + "_mean"] = mfcc_feat[:, i].mean() feature_dict[key + "_var"] = mfcc_feat[:, i].var() feature_dict[key + "_skewness"] = st.skew(mfcc_feat[:, i]) feature_dict[key + "_kurtosis"] = st.kurtosis(mfcc_feat[:, i]) # Vel feature_dict[key + "_vel_mean"] = vel[:, i].mean() feature_dict[key + "_vel_var"] = vel[:, i].var() feature_dict[key + "_vel_skewness"] = st.skew(vel[:, i]) feature_dict[key + "_vel_kurtosis"] = st.kurtosis(vel[:, i]) # Accel feature_dict[key + "_accel_mean"] = acc[:, i].mean() feature_dict[key + "_accel_var"] = acc[:, i].var() feature_dict[key + "_accel_skewness"] = st.skew(acc[:, i]) feature_dict[key + "_accel_kurtosis"] = st.kurtosis(acc[:, i]) # Need the skewness and kurtosis of all mfcc means if i > 0: mfcc_means.append(feature_dict[key + "_mean"]) feature_dict["mfcc_skewness"] = st.skew(mfcc_means) feature_dict["mfcc_kurtosis"] = st.kurtosis(mfcc_means) return feature_dict
def get_transform_funcs(train, cols): transform_funcs = [] for col in cols: vector = [row[col] for row in train] transforms = [ (skew(vector, bias=False), "none"), (skew(log_transform(vector), bias=False), "log"), (skew(sqrt_transform(vector), bias=False), "sqrt") ] best_transform = sorted(transforms)[0][1] transform_funcs.append(best_transform) return transform_funcs
def AAcal(seqcont): v = [] for i in range(len(seqcont)): vtar = seqcont[i] vtarv = [] vtar7 = 0 vtar8 = 0 vtar9 = 0 s = pd.Series(vtar) vtar3 = np.mean( vtar) # These 4 dimensions are relevant statistical terms vtar4 = st.kurtosis(vtar) vtar5 = np.var(vtar) vtar6 = st.skew(vtar) #for p in range(len(vtar)): # These 3 dimensions are inspired by PAFIG algorithm #vtar7=vtar[p]**2+vtar7 #if vtar[p]>va: #vtar8=vtar[p]**2+vtar8 #else: #vtar9=vtar[p]**2+vtar9 vcf1 = [] vcf2 = [] for j in range(len(vtar) - 1): #Sequence-order-correlation terms vcf1.append((vtar[j] - vtar[j + 1])) for k in range(len(vtar) - 2): vcf2.append((vtar[k] - vtar[k + 2])) vtar10 = np.mean(vcf1) vtar11 = np.var(vcf1) vtar11A = st.kurtosis(vcf1) vtar11B = st.skew(vcf1) vtar12 = np.mean(vcf2) vtar13 = np.var(vcf2) vtar13A = st.kurtosis(vcf2) vtar13B = st.skew(vcf2) vtarv.append(vtar3) vtarv.append(vtar4) vtarv.append(vtar5) vtarv.append(vtar6) #vtarv.append(vtar7/len(vtar)) #vtarv.append(vtar8/len(vtar)) #vtarv.append(vtar9/len(vtar)) vtarv.append(vtar10) vtarv.append(vtar11) vtarv.append(vtar11A) vtarv.append(vtar11B) vtarv.append(vtar12) vtarv.append(vtar13) vtarv.append(vtar13A) vtarv.append(vtar13B) v.append(vtarv) return v
def __extract_features(self, mfcc_data: dict) -> dict: """ Extracts the features from the MFCC data :param mfcc_data: MFCC data for an audio chunk :return: the extracted features from the input MFCC data """ features, mfcc_means = {}, [] for i in range(0, 14): key = "energy" if i == 0 else "mfcc_" + str(i) features.update( self.__get_summary_stats(key, mfcc_data["mfcc_features"], i)) features.update( self.__get_summary_stats(key + "_velocity", mfcc_data["velocity"], i)) features.update( self.__get_summary_stats(key + "_acceleration", mfcc_data["acceleration"], i)) if i > 0: mfcc_means.append(features[key + "_mean"]) features["mfcc_skewness"] = st.skew(np.array(mfcc_means)) features["mfcc_kurtosis"] = st.kurtosis(mfcc_means) return features
def get_data(column, np_values, alpha): mvs = bayes_mvs(np_values, alpha) #report these metrics output = [ present("Column", column), present("Length", len(np_values)), present("Unique", len(np.unique(np_values))), present("Min", np_values.min()), present("Max", np_values.max()), present("Mid-Range", (np_values.max() - np_values.min())/2), present("Range", np_values.max() - np_values.min()), present("Mean", np_values.mean()), present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])), present("Variance", mvs[1][0]), present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])), present("StdDev", mvs[2][0]), present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])), present("Mode", stats.mode(np_values)[0][0]), present("Q1", stats.scoreatpercentile(np_values, 25)), present("Q2", stats.scoreatpercentile(np_values, 50)), present("Q3", stats.scoreatpercentile(np_values, 75)), present("Trimean", trimean(np_values)), present("Minhinge", midhinge(np_values)), present("Skewness", stats.skew(np_values)), present("Kurtosis", stats.kurtosis(np_values)), present("StdErr", sem(np_values)), present("Normal-P-value", normaltest(np_values)[1]) ] return output
def get_data(column, np_values, alpha): mvs = bayes_mvs(np_values, alpha) #report these metrics output = [ present("Column", column), present("Length", len(np_values)), present("Unique", len(np.unique(np_values))), present("Min", np_values.min()), present("Max", np_values.max()), present("Mid-Range", (np_values.max() - np_values.min()) / 2), present("Range", np_values.max() - np_values.min()), present("Mean", np_values.mean()), present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])), present("Variance", mvs[1][0]), present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])), present("StdDev", mvs[2][0]), present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])), present("Mode", stats.mode(np_values)[0][0]), present("Q1", stats.scoreatpercentile(np_values, 25)), present("Q2", stats.scoreatpercentile(np_values, 50)), present("Q3", stats.scoreatpercentile(np_values, 75)), present("Trimean", trimean(np_values)), present("Minhinge", midhinge(np_values)), present("Skewness", stats.skew(np_values)), present("Kurtosis", stats.kurtosis(np_values)), present("StdErr", sem(np_values)), present("Normal-P-value", normaltest(np_values)[1]) ] return output
def get_stats_numpy(data, zero): mean = np.mean(data) median = np.median(data) std = np.std(data) var = np.var(data) skew = stats.skew(data) kurt = stats.kurtosis(data) pc = [25, 50, 75, 90] percentiles = np.array(np.percentile(data, pc)) silences = np.count_nonzero(np.asarray(data) == zero) silence_mean = np.mean( list(sum(1 for _ in g) for k, g in groupby(data) if k == zero)) longest_silence = max( sum(1 for _ in g) for k, g in groupby(data) if k == 0) if silences > 0 else 0 shortest_silence = min( sum(1 for _ in g) for k, g in groupby(data) if k == 0) if silences > 0 else 0 # print("Mean: " + str(mean)) # print("Media: " + str(median)) # print("StdDev: " + str(std)) # print("Variance: " + str(var)) # print("Skewness: " + str(skew)) # print("Kurtosis: " + str(kurt)) # print("Pc25: " + str(percentiles[0])) # print("Pc50: " + str(percentiles[1])) # print("Pc75: " + str(percentiles[2])) features = np.hstack( (mean, median, std, var, skew, kurt, percentiles, silences, silence_mean, longest_silence, shortest_silence)) return features
def get_stats_json(data): mean = np.mean(data) median = np.median(data) std = np.std(data) var = np.var(data) skew = stats.skew(data) kurt = stats.kurtosis(data) pc = [25,50,75] percentiles = np.array(np.percentile(data, pc)) silences = np.count_nonzero(np.asarray(data)==0.0) longest_silence = max(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0 shortest_silence = min(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0 #print("Mean: " + str(mean)) #print("Media: " + str(median)) #print("StdDev: " + str(std)) #print("Variance: " + str(var)) #print("Skewness: " + str(skew)) #print("Kurtosis: " + str(kurt)) #print("Pc25: " + str(percentiles[0])) #print("Pc50: " + str(percentiles[1])) #print("Pc75: " + str(percentiles[2])) statistiscs = { 'mean': mean, 'median': median, 'std': std, 'var': var, 'skew': skew, 'kurt': kurt, 'pc25': percentiles[0], 'pc50': percentiles[1], 'pc75': percentiles[2], } return statistiscs
def get_launch_feature(row): feature = pd.Series() feature['user_id'] = list(row['user_id'])[0] # feature['launch_count'] = len(row) diff_day = np.diff(row['day']) if len(diff_day) != 0: feature['launch_day_diff_mean'] = np.mean(diff_day) feature['launch_day_diff_std'] = np.std(diff_day) feature['launch_day_diff_max'] = np.max(diff_day) feature['launch_day_diff_min'] = np.min(diff_day) feature['launch_day_diff_kur'] = stats.kurtosis(diff_day) feature['launch_day_diff_ske'] = stats.skew(diff_day) feature['launch_day_diff_last'] = diff_day[-1] # feature['launch_day_cut_max_day'] = day_cut_max_day(row['day']) feature['launch_sub_register'] = np.subtract(np.max(row['max_day']), np.max(row['day'])) else: feature['launch_day_diff_mean'] = 0 feature['launch_day_diff_std'] = 0 feature['launch_day_diff_max'] = 0 feature['launch_day_diff_min'] = 0 feature['launch_day_diff_kur'] = 0 feature['launch_day_diff_ske'] = 0 feature['launch_day_diff_last'] = 0 # feature['launch_day_cut_max_day'] = day_cut_max_day(row['day']) feature['launch_sub_register'] = np.subtract(np.max(row['max_day']), np.max(row['day'])) launch_day_count = np.bincount(row['day'])[np.nonzero( np.bincount(row['day']))[0]] feature['launch_day_count_mean'] = np.mean(launch_day_count) feature['launch_day_count_max'] = np.max(launch_day_count) feature['launch_day_count_std'] = np.std(launch_day_count) return feature
def ICAFilter(signal=None): # EEG filtering based on Independent Component Analysis # ICA decomposition ica = FastICA(whiten=True) IC = ica.fit(signal).transform(signal) A = ica.get_mixing_matrix() # signal = np.dot(IC, A.T) # noise metrics sigma2 = IC.std(ddof=1, axis=0)**2 f1 = np.abs(IC).max(axis=0) / sigma2 f2 = np.abs(stats.skew(IC, bias=False, axis=0)) f = np.hstack((f1.reshape((len(f1), 1)), f2.reshape((len(f2), 1)))) fr = f.copy() f /= f.max(axis=0) norm = np.sqrt(np.dot(f, f.T)).diagonal() # remove noisy IC ind = norm.argmax() IC_ = IC.copy() IC_[:, ind] = 0 # recompute signal signalF = np.dot(IC_, A.T) return signalF, IC, fr
def usr_moments(coords): """ Calculates the USR moments for a set of input coordinates as well as the four USR reference atoms. :param coords: numpy.ndarray """ # centroid of the input coordinates ctd = coords.mean(axis=0) # get the distances to the centroid dist_ctd = distance_to_point(coords, ctd) # get the closest and furthest coordinate to/from the centroid cst, fct = coords[dist_ctd.argmin()], coords[dist_ctd.argmax()] # get the distance distributions for the points that are closest/furthest # to/from the centroid dist_cst = distance_to_point(coords, cst) dist_fct = distance_to_point(coords, fct) # get the point that is the furthest from the point that is furthest from # the centroid ftf = coords[dist_fct.argmax()] dist_ftf = distance_to_point(coords, ftf) # calculate the first three moments for each of the four distance distributions moments = concatenate([(ar.mean(), ar.std(), cbrt(skew(ar))) for ar in (dist_ctd, dist_cst, dist_fct, dist_ftf)]) # return the USR moments as well as the four points for later re-use return (ctd, cst, fct, ftf), moments
def _get_reward(self, real_values: dict, i: int): """ Get the reward returned after previous action """ df = pd.read_csv('output.csv', skiprows=[0], sep=';') last_return = df['price'].values[-1] / self.init_price - 1 reward = {'return': last_return} if i < 100: # + 1 return reward # returns = self.sim_df.tail(99)['return'].dropna().values + [ last_return ] mu, sigma = norm.fit(returns) skew, kurtosis = st.skew(returns), st.kurtosis(returns) # autocorr = f_autocorr(np.abs(returns))[0, 1] reward.update({ 'mu': mu, 'sigma': sigma, 'skew': skew, 'kurtosis': kurtosis, # 'autocorr': autocorr, }) # error = { # k: np.abs((reward[k] - real_values[k])**2 / real_values[k]) # for k, v in reward.items() if k != 'return' # } sub_df = self.df.iloc[i - 100:i] error = { k: ((reward[k] - sub_df[k].mean()) / sub_df[k].std())**2 for k, v in reward.items() if k != 'return' } reward['error'] = -sum(error.values()) os.remove('output.csv') return reward
def get_mean_var_skew_kurt(np_array): return { "mean": np_array.mean(), "var": np_array.var(), "skewness": st.skew(np_array), "kurtosis": st.kurtosis(np_array), }
def __get_summary_stats(key: str, data: np.array, coefficient: int) -> dict: return { key + "_mean": data[:, coefficient].mean(), key + "_variance": data[:, coefficient].var(), key + "_skewness": st.skew(data[:, coefficient]), key + "_kurtosis": st.kurtosis(data[:, coefficient]) }
def base_stats(data_1): stats_dict = np.zeros((data_1.shape[0], 4)) for i in range(data_1.shape[0]): stats_dict[i, 0] = st.skew(data_1[i], bias=False) stats_dict[i, 1] = st.kurtosis(data_1[i], bias=False) stats_dict[i, 2] = np.max(data_1[i]) stats_dict[i, 3] = np.std(data_1[i]) return stats_dict
def extract(self, sourcepc, neighborhood, targetpc, targetindex, volume_description): if neighborhood: source_data = sourcepc[point][self.data_key]['data'][neighborhood] skew = stat.skew(source_data) else: skew = np.NaN return skew
def _extract_one(self, point_cloud, neighborhood): if neighborhood: source_data = point_cloud[point][ self.data_key]['data'][neighborhood] skew = stat.skew(source_data) else: skew = np.NaN return skew
def skewness_features(img, pcloud): feats = [] points = pcloud.get_numpy() dim = pcloud.dims for x_i in range(dim): print(points[:, x_i].shape) corr_xy = st.skew(points[:, x_i]) feats.append(corr_xy) # print(feats) return feats
def skewness_normal_distribution(df, features, crypto_name, output_path): res = {'feature': [], 'skewness_of_n_distrib': []} for feature in features: #df = df.dropna(subset=[feature]) stat, p = stats.skew(df[feature]) res['feature'].append(feature) res['skewness_of_n_distrib'].append(stat) pd.DataFrame(data=res).to_csv(output_path + crypto_name + ".csv", sep=",", index=False)
def main(): """main function""" json_string = raw_input() #json_string = data1 # load json data parsed_json = json.loads(json_string) # histogram record length rec_len = len(parsed_json[0]['histogram']) # variables declaration date_lst = [] rmse_lst = [] skewnessList = [] # loop through the record for record in parsed_json: # extract date part date = record['date'] # extract histogram data part histogram = record['histogram'] # compute rmse (root mean squared error for the histogram) rmse = compute_rmse(histogram) # add the computed rmse to a list. This gives us a date-wise rmse # values for histogram rmse_lst.append(rmse) # add date to a date list date_lst.append(date) # compute skewness of the histogram. # Skewness is the measure of symmetry. # If there is a spurious rise in the skewness value that means there is a # clear deviation of the distributionon a certain day from how it # appeared in the previous one skewnessList.append(st.skew(histogram)) # compute standard deviation for the rmse list stddev = compute_stddev(rmse_lst) # compute two standard deviation of skewness list skew_2stdDev = compute_2stddev(skewnessList) # check for regression regression_date = check_regression(rmse_lst, stddev, skewnessList, skew_2stdDev, date_lst) # print the regression date. If regression is found then print the date # else print an empty string print regression_date
def get_mfcc_features(filename): feature_dict = {} (rate, sig) = wav.read(filename) if sig.ndim == 2: # wav is stereo so average over both channels mfcc_feat_chan0 = mfcc(sig[:,0], rate, numcep=15, appendEnergy=True) mfcc_feat_chan1 = mfcc(sig[:,1], rate, numcep=15, appendEnergy=True) mfcc_feat = (mfcc_feat_chan0 + mfcc_feat_chan1) / 2 else: mfcc_feat = mfcc(sig, rate, numcep=15, appendEnergy=True) # Velocity is the difference between timestep t+1 and t for each mfcc_feat / 2 vel = (mfcc_feat[:-1,:] - mfcc_feat[1:,:]) / 2.0 # Acceleration is the difference between timestep t+1 and t for each velocity / 2 acc = (vel[:-1,:] - vel[1:,:]) / 2.0 mfcc_means = [] for i in xrange(0, 14): key = "energy" if i == 0 else "mfcc" + str(i) # mfcc feature_dict[key + "_mean"] = mfcc_feat[:, i].mean() feature_dict[key + "_var"] = mfcc_feat[:, i].var() feature_dict[key + "_skewness"] = st.skew(mfcc_feat[:, i]) feature_dict[key + "_kurtosis"] = st.kurtosis(mfcc_feat[:, i]) # Vel feature_dict[key + "_vel_mean"] = vel[:, i].mean() feature_dict[key + "_vel_var"] = vel[:, i].var() feature_dict[key + "_vel_skewness"] = st.skew(vel[:, i]) feature_dict[key + "_vel_kurtosis"] = st.kurtosis(vel[:, i]) # Accel feature_dict[key + "_accel_mean"] = acc[:, i].mean() feature_dict[key + "_accel_var"] = acc[:, i].var() feature_dict[key + "_accel_skewness"] = st.skew(acc[:, i]) feature_dict[key + "_accel_kurtosis"] = st.kurtosis(acc[:, i]) # Need the skewness and kurtosis of all mfcc means if i > 0: mfcc_means.append(feature_dict[key + "_mean"]) feature_dict["mfcc_skewness"] = st.skew(mfcc_means) feature_dict["mfcc_kurtostis"] = st.kurtosis(mfcc_means) return feature_dict
def aggregate_ftr_matrix(self, ftr_matrix): sig = [] for ftr in ftr_matrix: median = stats.nanmedian(ftr) mean = stats.nanmean(ftr) std = stats.nanstd(ftr) # Invalid double scalars warning appears here skew = stats.skew(ftr) if any(ftr) else 0.0 kurtosis = stats.kurtosis(ftr) sig.extend([median, mean, std, skew, kurtosis]) return sig
def startApplication(images): trainData = [] for image in images: img = Image.open(image) # Grayscale convertion imgBW = convertGrayscale(img) # Scaling to 100x100 imgResize = scaleImage(imgBW) # Denoising using median filtering imgDenoised = denoiseImage(imgResize) # Background elimination imgBackgroundEliminated = backgroundEliminate(imgDenoised) # Signature Normalization imgNormal = normalizeImage(imgBackgroundEliminated) # Thinning Image imgThin = thinImage(imgNormal, 300) # Feature extraction # # Global Feature # Density feature density = getDensityOfImage(imgThin) print('Density ', density) # Width to height ratio widthHeightRatio = getWidthToHeightRatio(imgThin) print('Width to height ratio', widthHeightRatio) # Slope feature slope = getSlope(imgThin) print('Slope', slope) # Skew feature skew = stats.skew(imgThin) print('Skew', skew) # Constructing train data pattern = [] pattern.append(density) pattern.append(widthHeightRatio) pattern.append(slope) pattern.extend(skew) trainData.append(pattern) # Training train(trainData)
def get_feature(region_props, n_region, feature_name): feature = [0] * 5 if n_region > 0: feature_values = [region[feature_name] for region in region_props] feature[MAX] = format_2f(np.max(feature_values)) feature[MEAN] = format_2f(np.mean(feature_values)) feature[VARIANCE] = format_2f(np.var(feature_values)) feature[SKEWNESS] = format_2f(st.skew(np.array(feature_values))) feature[KURTOSIS] = format_2f(st.kurtosis(np.array(feature_values))) return feature
def get_feature(region_props, n_region, feature_name): feature = [0] * 5 if n_region > 0: feature_values = [region[feature_name] for region in region_props] feature[MAX] = utils.format_2f(np.max(feature_values)) feature[MEAN] = utils.format_2f(np.mean(feature_values)) feature[VARIANCE] = utils.format_2f(np.var(feature_values)) feature[SKEWNESS] = utils.format_2f(st.skew(np.array(feature_values))) feature[KURTOSIS] = utils.format_2f(st.kurtosis(np.array(feature_values))) return feature
def extract_features_for_pqrst(row, pqrsts): features = [] p = [x[0] for x in pqrsts] q = [x[1] for x in pqrsts] r = [x[2] for x in pqrsts] s = [x[3] for x in pqrsts] t = [x[4] for x in pqrsts] pqrsts = pqrsts[:min(NB_RR, len(pqrsts))] row = low_pass_filtering(row) row = high_pass_filtering(row) for i in range(len(pqrsts)): pq = row[p[i]:q[i]] st = row[s[i]:t[i]] pt = row[p[i]:t[i]] pmax = np.amax(pq) pmin = np.amax(pq) tmax = np.amax(st) tmin = np.amax(st) p_mean = np.mean(pq) t_mean = np.mean(st) features += [ # features for PQ interval pmax, pmax / row[r[i]], pmin / pmax, p_mean, p_mean / pmax, np.std(pq), common.mode(pq), # feature for ST interval tmax, tmax / row[r[i]], tmin / tmax, t_mean, t_mean / tmax, np.std(st), common.mode(st), p_mean / t_mean, # features for whole PQRST interval stats.skew(pt), stats.kurtosis(pt) ] for i in range(NB_RR - len(pqrsts)): features += [0 for x in range(17)] return features
def get_feature(region_props, n_region, feature_name): if n_region > 0: feature_values = [region[feature_name] for region in region_props] feature = feature_tuple( MAX=format_2f(np.max(feature_values)), MEAN=format_2f(np.mean(feature_values)), VARIANCE=format_2f(np.var(feature_values)), SKEWNESS=format_2f(st.skew(np.array(feature_values))), KURTOSIS=format_2f(st.kurtosis(np.array(feature_values)))) else: feature = feature_tuple(*([0] * 5)) return feature
def getFourMoments(sequence, ax=1): finalArray = [ np.mean(sequence, axis=ax), np.var(sequence, axis=ax), skew(sequence, axis=ax), kurtosis(sequence, axis=ax), sem(sequence, axis=ax), ] if ax != None: finalArray = np.array(finalArray) finalArray = finalArray.T return np.concatenate((finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax) finalArray.extend(mquantiles(sequence, axis=ax)) return np.array(finalArray)
def _calculateStatistics(self, img, haralick=False, zernike=False): result = [] # 3-bin histogram result.extend(mquantiles(img)) # First four moments result.extend([img.mean(), img.var(), skew(img, axis=None), kurtosis(img, axis=None)]) # Haralick features if haralick: integerImage = dtype.img_as_ubyte(img) result.extend(texture.haralick(integerImage).flatten()) # Zernike moments if zernike: result.extend(zernike_moments(img, int(self.rows) / 2 + 1)) return result
def usr_moments_with_existing(coords, ref_points): """ Calculates the USR moments for a set of coordinates and an already existing set of four USR reference points. """ ctd, cst, fct, ftf = ref_points dist_ctd = distance_to_point(coords, ctd) dist_cst = distance_to_point(coords, cst) dist_fct = distance_to_point(coords, fct) dist_ftf = distance_to_point(coords, ftf) moments = concatenate([(ar.mean(), ar.std(), cbrt(skew(ar))) for ar in (dist_ctd, dist_cst, dist_fct, dist_ftf)]) return moments
def get_feature(region_props, n_region, feature_name): """ Returns: feature:list of [max, mean, variance, skewness, kurtosis] """ feature = [0] * 5 if n_region > 0: feature_values = [region[feature_name] for region in region_props] feature[MAX] = utils.format_2f(np.max(feature_values)) feature[MEAN] = utils.format_2f(np.mean(feature_values)) feature[VARIANCE] = utils.format_2f(np.var(feature_values)) feature[SKEWNESS] = utils.format_2f(st.skew(np.array(feature_values))) feature[KURTOSIS] = utils.format_2f(st.kurtosis(np.array(feature_values))) return feature
def generate_moment(dataset, NO_OF_PROPERTIES, NO_MOMENTS): element_count = len(dataset) moments = np.zeros((element_count, NO_OF_PROPERTIES, NO_MOMENTS)) # TODO debugging here only for row in range(element_count): moments[row, :, :] = np.array([ scipy.mean(dataset[row][0:NO_OF_PROPERTIES, :], axis=1), # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1), # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1), # scipy.mean(dataset[row][0:NO_OF_PROPERTIES,:], axis=1), scipy.std(dataset[row][0:NO_OF_PROPERTIES, :], axis=1), stats.skew(dataset[row][0:NO_OF_PROPERTIES, :], axis=1), stats.kurtosis(dataset[row][0:NO_OF_PROPERTIES, :], axis=1) ]).transpose() return moments
def getFourMoments(sequence, ax=1): finalArray = [ np.mean(sequence, axis=ax), np.var(sequence, axis=ax), skew(sequence, axis=ax), kurtosis(sequence, axis=ax), sem(sequence, axis=ax) ] if ax != None: finalArray = np.array(finalArray) finalArray = finalArray.T return np.concatenate( (finalArray, np.array(mquantiles(sequence, axis=ax))), axis=ax) finalArray.extend(mquantiles(sequence, axis=ax)) return np.array(finalArray)
def compute_features(dataframe, columns, bins, model, model_type="KMeans"): """ Compute the features of the specified columns from a Pandas dataframe using the given model. :param dataframe: Pandas dataframe. :param columns: List of the columns name. :param bins: Number of bins. :param model: Model. :param model_type: Type of the model. :return: Features. """ import numpy as np import scipy.stats.stats as st row = [] for j, column in enumerate(columns): column_df = dataframe[column] X = column_df.values if model is not None: if model_type == "KMeans": r = model[column].predict(X.reshape(-1, 1)) if model_type == "PolynomialFeatures": r = model[column].transform(X.reshape(-1, 1)).tolist() else: r = X # compute feature histogram # counts, bin_edges = np.histogram(result, bins=bins[j], density=False) # column_hist = counts # compute normalized feature histogram counts, bin_edges = np.histogram(r, bins=bins[j], density=True) column_hist = counts * np.diff(bin_edges) row.extend(column_hist) # add extra features kurtosis = st.kurtosis(X.reshape(-1, 1))[0] skew = st.skew(X.reshape(-1, 1))[0] min_value = column_df.min() max_value = column_df.max() mean_value = column_df.mean() median_value = column_df.median() row.extend( [kurtosis, skew, min_value, max_value, mean_value, median_value]) return row
def extract_features(data, y, window_len, task2=False): #num_windows): i = 0 #window_len = len(data)/(num_windows/2) if task2: num_windows = len(data) - window_len + 1 else: num_windows = len(data) / (window_len / 2) #print 'num_windows = 208, window_len = ' , str(len(data)/(208/2)) #print 'now num_windows = '+ str(num_windows)+', window_len = '+str(window_len) features = [] targets = [] for n in range(num_windows): win = data[i:i + window_len] if task2: target = y.iloc[i] else: try: target = int(y[i:i + window_len].mode()) except: target = int(y[i:i + window_len]) targets.append(target) for c in data.columns: s = np.array(win[c]) rms_val = rms(s) (min_max, peak, peaknum) = min_max_mean(s) mean = s.mean() std = s.std() skew = st.skew(s) kurtosis = st.kurtosis(s) coefficients = std / mean logpower = np.log10((s**2)).sum() new_features = [ rms_val, min_max, mean, std, skew, kurtosis, peak, peaknum, coefficients, logpower ] #new_features = [rms_val, min_max, mean, std] features.append(new_features) if (task2): i += 1 else: i += window_len / 2 features = np.array(features) features.shape = num_windows, 120 #48#72 targets = np.array(targets) return features, targets
def usr_moments_with_existing(coords, ref_points, number_of_moments=3, mean=0): """ :param coords: :param ref_points: :param number_of_moments: :param mean: index in [np.mean, geometrical_mean, harmonical_mean] :return: """ n_dimension = coords.shape[1] center = np.mean(coords) # get distance matrix where rows are pivot points and columns are data points dist_to_centroid = np.array( [[np.linalg.norm(coords[j] - center) for j in range(coords.shape[0])]]) dist_matrix = np.spatial.distance_matrix(ref_points, coords) # aggregate the symetric pivots if mean not in [0, 1, 2]: mean = 0 mean_options = [np.mean, geometrical_mean, harmonical_mean] mean = mean_options[mean] dist_ufsr = np.array([ mean(dist_matrix[i], dist_matrix[n_dimension + i]) for i in range(n_dimension) ]) # add the distance to center of mass, dist_ufsr is now a matrix with distribution of distances wrt # (000),mean(100,-100), mean(010,0-10), mean(001,00-1) as rows dist_ufsr = np.concatenate((dist_to_centroid, dist_ufsr)) # get the features means = np.array([[np.mean(dist_ufsr[i]) for i in range(n_dimension + 1)]]) means = np.transpose(means) # moments = np.array( # [[np.stats.moment(dist_ufsr[i], j) for j in range(2, number_of_moments + 1)] for i in range(n_dimension + 1)]) # FIXME : VARIANCE VS STANDARD DEVIATION include also other moments to use number_of_moments moments = np.array([[dist_ufsr[i].std(), cbrt(skew(dist_ufsr[i]))] for i in range(n_dimension + 1)]) ufsr_feature = np.concatenate((means, moments), axis=1) # mean, moment2, moment3..., moment6, mean, moment1... for each pivot ufsr_feature = ufsr_feature.ravel() return ufsr_feature
def get_mean_var_skew_kurt(np_array): return {"mean":np_array.mean(), "var":np_array.var(), "skewness":st.skew(np_array), "kurtosis":st.kurtosis(np_array),}
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("segments_filename") args = parser.parse_args() segment_boundaries = np.loadtxt(args.segments_filename, usecols=(2, 3)) segment_lengths = segment_boundaries[:, 1] - segment_boundaries[:, 0] count = len(segment_lengths) mean = np.mean(segment_lengths) median = np.median(segment_lengths) print("num segments read: {:d}".format(count)) print("total time (h): {:.2f}".format(np.sum(segment_lengths) / 3600)) print("mean (s): {:.2f}".format(mean)) print("median (s): {:.2f}".format(median)) print("skew: {:.2f}".format(st.skew(segment_lengths, bias=True))) print("skew [corrected]: {:.2f}".format(st.skew(segment_lengths, bias=False))) print("skewtest: {}".format(st.skewtest(segment_lengths))) print("kurtosis: {:.2f}".format(st.kurtosis(segment_lengths))) # Figure out how many segments would fill the desired number of hours, # then round up to the nearest 10k. possible_num_hours_segmentations = (100, 300, 500, 1000, 1500, 3000) print("=== from mean ===") for num_hours in possible_num_hours_segmentations: num_segments = int(num_hours * 3600 / mean) print("{:d} h: {:d} ({:d}) segments".format(num_hours, round(num_segments, -4), num_segments)) print("=== from median ===") for num_hours in possible_num_hours_segmentations: num_segments = int(num_hours * 3600 / median) print("{:d} h: {:d} ({:d}) segments".format(num_hours, round(num_segments, -4), num_segments))
import numpy as np import matplotlib.pyplot as plt from criticality import * import scipy.stats.stats as st xt = np.genfromtxt('xt.csv',delimiter=';') phit = np.tanh(xt) print xt.shape s = fr2spike(phit,0.1) sau = SimActiveUnits(s) av = avalancheSize(sau) print np.mean(sau) print np.std(sau) print st.skew(sau,bias=False) plt.hist(sau,bins=50) plt.show()
tempc,templ = temp.shape for n in xrange(0, 600-1): for m in xrange(0,2): WAVEFORMLENGTH_a[m] = WAVEFORMLENGTH_a[m] + (-temp[n][m]+temp[n+1][m]); temp = list(temp.values) #print len(find(np.diff(np.sign(temp[0][:])))) for o in xrange(0, 2): ZEROCROSSINGS_a[o] = len(find(np.diff(np.sign(temp[o][0:599])))); SLOPECHANGES_a[o] = len(find(np.diff(np.sign(np.diff(temp[o][0:599]))))); SKEWNESS_a[o] = st.skew(temp[o][0:599]); HJORTHPARAM_activity_a[o] = np.var(temp[o][0:599]); HJORTHPARAM_mobility_a[o] = np.sqrt((np.var(np.diff(temp[o][0:599])))/np.var(temp[o][0:599])); HJORTHPARAM_complexity_a[o] = (np.sqrt(np.var(np.diff(np.diff(temp[o][0:599]))))/(np.var(np.diff(temp[o][0:599]))))/np.sqrt((np.var(np.diff(temp[o][0:599])))/np.var(temp[o][0:599])); ZEROCROSSINGS_a = ZEROCROSSINGS_a.transpose() SLOPECHANGES_a = SLOPECHANGES_a.transpose() SKEWNESS_a = SKEWNESS_a.transpose() HJORTHPARAM_activity_a = HJORTHPARAM_activity_a.transpose() HJORTHPARAM_mobility_a = HJORTHPARAM_mobility_a.transpose() HJORTHPARAM_complexity_a = HJORTHPARAM_complexity_a.transpose() WAVEFORMLENGTH_a = WAVEFORMLENGTH_a.transpose() # Concatenando os atributos para formar a matriz de dados data = []; for i in xrange(0, 2):
def signal_stats(signal=None): """Compute various metrics describing the signal. Parameters ---------- signal : array Input signal. Returns ------- mean : float Mean of the signal. median : float Median of the signal. max : float Maximum signal amplitude. var : float Signal variance (unbiased). std_dev : float Standard signal deviation (unbiased). abs_dev : float Absolute signal deviation. kurtosis : float Signal kurtosis (unbiased). skew : float Signal skewness (unbiased). """ # check inputs if signal is None: raise TypeError("Please specify an input signal.") # ensure numpy signal = np.array(signal) # mean mean = np.mean(signal) # median median = np.median(signal) # maximum amplitude maxAmp = np.abs(signal - mean).max() # variance sigma2 = signal.var(ddof=1) # standard deviation sigma = signal.std(ddof=1) # absolute deviation ad = np.sum(np.abs(signal - median)) # kurtosis kurt = stats.kurtosis(signal, bias=False) # skweness skew = stats.skew(signal, bias=False) # output args = (mean, median, maxAmp, sigma2, sigma, ad, kurt, skew) names = ('mean', 'median', 'max', 'var', 'std_dev', 'abs_dev', 'kurtosis', 'skewness') return utils.ReturnTuple(args, names)
def evaluate(self, t): """ t地点における短期的歪度を返します. """ d = self.asset.getPreviousData(t, self.__length) return stats.skew(d)
if maxInterim > maxValue: maxValue = maxInterim minInterim = min(my_data[:, x]) if minInterim < minValue: minValue = minInterim binWidth = (maxValue - minValue) / (numBins) newBins = np.arange(minValue, maxValue, binWidth) # TODO process array only once for speedup? for x in range (0, numModels): frequency = plt.hist(my_data[:, x], bins=newBins, histtype='step', normed=True, label=labels[x]); b[x, 0] = mean(my_data[:, x]); b[x, 1] = var(my_data[:, x]); b[x, 2] = skew(my_data[:, x]); b[x, 3] = kurtosis(my_data[:, x]); b[x, 4] = entropy(frequency[0]) plt.title(csvString + " Frequency") plt.legend() deg = u'\N{DEGREE SIGN}' plt.xlabel("Airflow Rate (cfm)") plt.ylabel("Frequency") for i in range (0, 5): print(b[:, i]) plt.show()