def get_global_feature(self): """ 获取时域全局特征,包含最大值、标准差、平均值 :param hadcropped: :return: """ square_data, square_energy, square_azrate = self.pre_process(method='hanning', ifcrop=True) func = lambda x: [ # feature_calc.autocorrelation(norm(x), 5), np.std(x), feature_calc.approximate_entropy(norm(x), 5, 1), feature_calc.cid_ce(x, normalize=True), feature_calc.count_above_mean(x), feature_calc.first_location_of_minimum(x), feature_calc.first_location_of_maximum(x), feature_calc.last_location_of_maximum(x), feature_calc.last_location_of_minimum(x), feature_calc.longest_strike_above_mean(x), feature_calc.number_crossing_m(x, 0.8*np.max(x)), feature_calc.skewness(x), feature_calc.time_reversal_asymmetry_statistic(x, 5) ] # global features I want to get upper_rate = self.get_upper_rate(square_energy) feature = np.hstack([ [np.mean(norm(square_energy))], [upper_rate], func(square_azrate), func(square_energy) ]) return feature
def get_sta_features(self, data): """ Calculate the value of 9 kinds of selected statistical features :param data: :return: """ def _cal_trend(data): time_list = np.arange(len(data)) # create linear regression object regr = linear_model.LinearRegression() regr.fit(time_list.reshape(-1, 1), np.array(data).reshape(-1, 1)) return regr.coef_[0][0] E = ts.abs_energy(data) S = ts.binned_entropy(data, max_bins=5) ro = ts.autocorrelation(data, lag=4) skewness = ts.skewness(data) kurtosis = ts.kurtosis(data) trend = _cal_trend(data) mean = ts.mean(data) min = ts.minimum(data) max = ts.maximum(data) return [E, S, ro, skewness, kurtosis, trend, mean, min, max]
def time_series_skewness(x): """ :param x: the time series to calculate the feature of :type x: pandas.Series :return: the value of this feature :return type: float """ return ts_feature_calculators.skewness(x)
def time_series_skewness(x): """ Returns the sample skewness of x (calculated with the adjusted Fisher-Pearson standardized moment coefficient G1). :param x: the time series to calculate the feature of :type x: pandas.Series :return: the value of this feature :return type: float """ return ts_feature_calculators.skewness(x)
def TS_features11(signal): percentage_of_reoccurring = ts.percentage_of_reoccurring_datapoints_to_all_datapoints( signal) percentage_of_reoccurring_values = ts.percentage_of_reoccurring_values_to_all_values( signal) ratio_value_number = ts.ratio_value_number_to_time_series_length(signal) sample_entropy = ts.sample_entropy(signal) skewness = ts.skewness(signal) return percentage_of_reoccurring, percentage_of_reoccurring_values, ratio_value_number, sample_entropy, skewness
def extract_feats(ts): std = fc.standard_deviation(ts) kurtosis = fc.kurtosis(ts) skewness = fc.skewness(ts) cam = fc.count_above_mean(ts) cbm = fc.count_below_mean(ts) lsam = fc.longest_strike_above_mean(ts) lsbm = fc.longest_strike_below_mean(ts) psd = fc.fourier_entropy(ts, bins=1000000) energy = fc.abs_energy(ts) return np.array( [std, kurtosis, skewness, cam, cbm, lsam, lsbm, psd, energy])
def get_mfcc_feature(self, hadcropped=False): ''' calculate Mel-frequency cepstral coefficients in frequency domain and extract features from MFCC :return: numpy array ''' assert self.frame_per_second not in [32, 64, 128, 256], \ Exception("Cannot operate butterfly computation ," "frame per second should in [32, 64, 128, 256]") hanning_kernel = self.get_window(method='hanning') windowed = self._add_window(hanning_kernel, self.meta_audio_data) # [num_frame, kernel_size] hanning_energy = self.get_energy(self.meta_audio_data, hanning_kernel) if not hadcropped: boundary = self.get_boundary(hanning_energy) cropped = windowed[boundary[0]: boundary[1] + 1, :] frequency = np.vstack([fft.fft(frame.squeeze()) for frame in np.vsplit(cropped, len(cropped))]) else: frequency = np.vstack([fft.fft(windowed)]) frequency = np.abs(frequency) frequency_energy = frequency ** 2 low_freq = self.sr / self.num_per_frame high_freq = self.sr H = self._mfcc_filter(self.mfcc_cof, low_freq, high_freq) S = np.dot(frequency_energy, H.transpose()) # (F, M) cos_ary = self._discrete_cosine_transform() mfcc_raw_features = np.sqrt(2 / self.mfcc_cof) * np.dot(S, cos_ary) # (F,N) upper = [self.get_upper_rate(fea) for fea in mfcc_raw_features.transpose()] assert len(upper) == mfcc_raw_features.shape[1] func = lambda x: [ # feature_calc.autocorrelation(norm(x), 5), np.std(x), feature_calc.approximate_entropy(norm(x), 5, 1), feature_calc.cid_ce(x, normalize=True), feature_calc.count_above_mean(x), feature_calc.first_location_of_minimum(x), feature_calc.first_location_of_maximum(x), feature_calc.last_location_of_maximum(x), feature_calc.last_location_of_minimum(x), feature_calc.longest_strike_above_mean(x), feature_calc.number_crossing_m(x, 0.8*np.max(x)), feature_calc.skewness(x), feature_calc.time_reversal_asymmetry_statistic(x, 5) ] mfcc_features = np.hstack( [func(col) for col in mfcc_raw_features.transpose()] ) return mfcc_features
def transform(self, value): if value is None: return None # TODO: remove try-except and validate value in order to avoid exception try: return [ abs_energy(value), kurtosis(value), mean_abs_change(value), skewness(value), count_above_mean(value) / len(value), count_below_mean(value) / len(value) ] except: return None
def scalar_feature_extraction(column): retval = np.zeros([1, 10], dtype=float) retval[0][0] = tffe.count_above_mean(column.values) retval[0][1] = tffe.mean(column.values) retval[0][2] = tffe.maximum(column.values) retval[0][3] = tffe.median(column.values) retval[0][4] = tffe.minimum(column.values) retval[0][5] = tffe.sample_entropy(column.values) if (isNaN(retval[0][5])): retval[0][5] = 0 retval[0][6] = tffe.skewness(column.values) retval[0][7] = tffe.variance(column.values) retval[0][8] = tffe.longest_strike_above_mean(column.values) retval[0][9] = tffe.longest_strike_below_mean(column.values) return retval
def skewness(self): return np.array( [tcal.skewness(self.data[i, :]) for i in range(len(self.data))])
def skewness(current_observation: pd.DataFrame, raw_key: str): return tsf.skewness(current_observation[raw_key])
def createFeature(np_meal_data, np_no_meal_data): # Now we start by creating features for meal and no meal data df_meal = pd.DataFrame(np_meal_data) df_no_meal = pd.DataFrame(np_no_meal_data) # initialize the feature dataframes feature_meal = pd.DataFrame() feature_no_meal = pd.DataFrame() # first feature tmin - tmax =diffTime # for meal ff1 = pd.DataFrame() ff1['diffTime'] = df_meal.apply( lambda row: abs(row.idxmax() - row.idxmin()), axis=1) # feature_meal = feature_meal.merge(ff1, left_index=True, right_index = True) feature_meal = ff1 # for no meal ff1 = pd.DataFrame() ff1['diffTime'] = df_no_meal.apply( lambda row: abs(row.idxmax() - row.idxmin()), axis=1) # feature_no_meal = feature_no_meal.merge(ff1, left_index=True, right_index = True) feature_no_meal = ff1 # second feature Glucosemin- GlucoseMax # for meal ff2 = pd.DataFrame() ff2['diffGlucose'] = df_meal.apply(lambda row: row.max() - row.min(), axis=1) feature_meal = feature_meal.merge(ff2, left_index=True, right_index=True) # for no meal ff2 = pd.DataFrame() ff2['diffGlucose'] = df_no_meal.apply(lambda row: row.max() - row.min(), axis=1) feature_no_meal = feature_no_meal.merge(ff2, left_index=True, right_index=True) # third feature Fourier transform def fourier(row): val = abs(fft(row)) val.sort() return np.flip(val)[0:3] # for meal ff31 = pd.DataFrame() ff31['FFT'] = df_meal.apply(lambda x: fourier(x), axis=1) ff3 = pd.DataFrame(ff31.FFT.tolist(), columns=['FFT1', 'FFT2', 'FFT3']) feature_meal = feature_meal.merge(ff3, left_index=True, right_index=True) # for no meal ff31 = pd.DataFrame() ff31['FFT'] = df_no_meal.apply(lambda x: fourier(x), axis=1) ff3 = pd.DataFrame(ff31.FFT.tolist(), columns=['FFT1', 'FFT2', 'FFT3']) feature_no_meal = feature_no_meal.merge(ff3, left_index=True, right_index=True) # fourth feature - CGMVelocity # for meal feature_meal['CGMVelocity'] = np.nan for i in range(len(df_meal)): liste_temp = df_meal.loc[i, :].tolist() summer = [] for j in range(1, df_meal.shape[1]): summer.append(abs(liste_temp[j] - liste_temp[j - 1])) feature_meal.loc[i, 'CGMVelocity'] = np.round(np.mean(summer), 2) # for no meal feature_no_meal['CGMVelocity'] = np.nan for i in range(len(df_no_meal)): liste_temp = df_no_meal.loc[i, :].tolist() summer = [] for j in range(1, df_no_meal.shape[1]): summer.append(abs(liste_temp[j] - liste_temp[j - 1])) feature_no_meal.loc[i, 'CGMVelocity'] = np.round(np.mean(summer), 2) # fourth feature part 2 - tmax # for meal ff4 = pd.DataFrame() ff4['maxTime'] = df_meal.apply(lambda row: row.idxmax(), axis=1) feature_meal = feature_meal.merge(ff4, left_index=True, right_index=True) # for no meal ff4 = pd.DataFrame() ff4['maxTime'] = df_no_meal.apply(lambda row: row.idxmax(), axis=1) feature_no_meal = feature_no_meal.merge(ff4, left_index=True, right_index=True) # fifth feature skewness # for meal feature_meal['Skewness'] = np.nan for i in range(len(df_meal)): feature_meal['Skewness'][i] = ts.skewness(df_meal.loc[i, :]) # for no meal feature_no_meal['Skewness'] = np.nan for i in range(len(df_no_meal)): feature_no_meal['Skewness'][i] = ts.skewness(df_no_meal.loc[i, :]) # sixth feature entorpy # for meal # feature_meal['Entropy'] = np.nan # for i in range(len(df_meal)): # feature_meal['Entropy'][i] = ts.sample_entropy(np.array(df_meal.iloc[i, :])) # # for no meal # feature_no_meal['Entropy'] = np.nan # for i in range(len(df_no_meal)): # feature_no_meal['Entropy'][i] = ts.sample_entropy(np.array(df_no_meal.iloc[i, :])) # seventh feature kurtosis # for meal feature_meal['Kurt'] = np.nan for i in range(len(df_meal)): feature_meal['Kurt'][i] = ts.kurtosis(np.array(df_meal.iloc[i, :])) # for no meal feature_no_meal['Kurt'] = np.nan for i in range(len(df_no_meal)): feature_no_meal['Kurt'][i] = ts.kurtosis( np.array(df_no_meal.iloc[i, :])) return feature_meal, feature_no_meal
def main(): dirname = os.path.realpath('.') excelF = dirname + '\\Summary.xlsx' myworkbook = openpyxl.load_workbook(excelF) worksheet = myworkbook['SummaryPatients'] file = 1 for filename in glob.glob(dirname + "\*.txt"): data = open(filename, 'r') totalData = {} time = [] totalForceL = [] totalForceR = [] id = [] for line in data: tempForce = line.split() id.append(1) time.append(float(tempForce[0])) totalForceL.append(float(tempForce[17])) totalForceR.append(float(tempForce[18])) totalData["id"] = id totalData["time"] = time totalData["totalForceL"] = totalForceL totalData["totalForceR"] = totalForceR dataPandas = pd.DataFrame.from_dict(totalData) extracted_features = {} #extract_featuresL = extract_features(dataPandas, column_id="id", column_kind=None, column_value=None) worksheet['A' + str(file + 1)] = file if 'Pt' in filename: worksheet['B' + str(file + 1)] = 1 else: worksheet['B' + str(file + 1)] = 0 worksheet['C' + str(file + 1)] = tf.abs_energy( totalData["totalForceL"]) worksheet['D' + str(file + 1)] = tf.abs_energy( totalData["totalForceR"]) worksheet['E' + str(file + 1)] = tf.kurtosis(totalData["totalForceL"]) worksheet['F' + str(file + 1)] = tf.kurtosis(totalData["totalForceR"]) worksheet['G' + str(file + 1)] = tf.skewness(totalData["totalForceL"]) worksheet['H' + str(file + 1)] = tf.skewness(totalData["totalForceR"]) worksheet['I' + str(file + 1)] = tf.median(totalData["totalForceL"]) worksheet['J' + str(file + 1)] = tf.median(totalData["totalForceR"]) worksheet['K' + str(file + 1)] = tf.mean(totalData["totalForceL"]) worksheet['L' + str(file + 1)] = tf.mean(totalData["totalForceR"]) worksheet['M' + str(file + 1)] = tf.variance(totalData["totalForceL"]) worksheet['N' + str(file + 1)] = tf.variance(totalData["totalForceR"]) temp = tf.fft_aggregated(totalData["totalForceL"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp: if int == 0: worksheet['O' + str(file + 1)] = list[1] if int == 1: worksheet['P' + str(file + 1)] = list[1] if int == 2: worksheet['Q' + str(file + 1)] = list[1] if int == 3: worksheet['R' + str(file + 1)] = list[1] int += 1 temp2 = tf.fft_aggregated(totalData["totalForceR"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp2: if int == 0: worksheet['S' + str(file + 1)] = list[1] if int == 1: worksheet['T' + str(file + 1)] = list[1] if int == 2: worksheet['U' + str(file + 1)] = list[1] if int == 3: worksheet['V' + str(file + 1)] = list[1] int += 1 file += 1 myworkbook.save(excelF)
def get_skew(arr): res = np.array([skewness(arr)]) res = np.nan_to_num(res) return res
np.array(dataset.iloc[i, :]), 2) #CGM Velocity feat_dataset['CGM_Velocity'] = np.nan for i in range(len(dataset)): c_list = dataset.loc[i, :].tolist() sum_ = [] for j in range(1, len(c_list)): sum_.append(abs(c_list[j] - c_list[j - 1])) feat_dataset['CGM_Velocity'][i] = np.round(np.mean(sum_), 2) #MinMax feat_dataset['CGM_MinMax'] = np.nan feat_dataset['CGM_MinMax'] = feat_dataset['CGM_Max'] - feat_dataset['CGM_Min'] ##SKewness feat_dataset['CGM_Skewness'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Skewness'][i] = ts.skewness(dataset.loc[i, :]) #CGM_Displacement feat_dataset['CGM_Displacement'] = np.nan for i in range(len(dataset)): c_list = dataset.loc[i, :].tolist() sum_ = [] for j in range(1, len(c_list)): sum_.append(abs(c_list[j] - c_list[j - 1])) feat_dataset['CGM_Displacement'][i] = np.round(np.sum(sum_), 2) #CGM_Kurtosis feat_dataset['CGM_Kurtosis'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Kurtosis'][i] = ts.kurtosis(np.array(dataset.iloc[i, :])) #Recurr feat_dataset['CGM_Recur'] = np.nan for i in range(len(dataset)):
def main(): dirname = os.path.realpath('.') filename = dirname + '\\GaPt07_01.txt' data = open(filename, 'r') totalData = {} time = [] totalForceL = [] totalForceR = [] id = [] for line in data: tempForce = line.split() id.append(1) time.append(float(tempForce[0])) totalForceL.append(float(tempForce[17])) totalForceR.append(float(tempForce[18])) totalData["id"] = id totalData["time"] = time totalData["totalForceL"] = totalForceL totalData["totalForceR"] = totalForceR dataPandas = pd.DataFrame.from_dict(totalData) extracted_features = {} #extract_featuresL = extract_features(dataPandas, column_id="id", column_kind=None, column_value=None) extracted_features["absEnergyL"] = tf.abs_energy(totalData["totalForceL"]) extracted_features["absEnergyR"] = tf.abs_energy(totalData["totalForceR"]) extracted_features["kurtosisL"] = tf.kurtosis(totalData["totalForceL"]) extracted_features["kurtosisR"] = tf.kurtosis(totalData["totalForceR"]) extracted_features["skewnessL"] = tf.skewness(totalData["totalForceL"]) extracted_features["skewnessR"] = tf.skewness(totalData["totalForceR"]) extracted_features["medianL"] = tf.median(totalData["totalForceL"]) extracted_features["medianR"] = tf.median(totalData["totalForceR"]) extracted_features["meanL"] = tf.mean(totalData["totalForceL"]) extracted_features["meanR"] = tf.mean(totalData["totalForceR"]) extracted_features["varianceL"] = tf.variance(totalData["totalForceL"]) extracted_features["varianceR"] = tf.variance(totalData["totalForceR"]) temp = tf.fft_aggregated(totalData["totalForceL"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp: if int == 0: extracted_features["fftCentroidL"] = list if int == 1: extracted_features["fftVarianceL"] = list if int == 2: extracted_features["fftSkewL"] = list if int == 3: extracted_features["fftKurtosisL"] = list int += 1 temp2 = tf.fft_aggregated(totalData["totalForceR"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp2: if int == 0: extracted_features["fftCentroidR"] = list if int == 1: extracted_features["fftVarianceR"] = list if int == 2: extracted_features["fftSkewR"] = list if int == 3: extracted_features["fftKurtosisR"] = list int += 1
def feat_extraction(dataset): feat_dataset = pd.DataFrame(index=np.arange(len(dataset))) #Calculated columns feat_dataset['CGM_Min'] = dataset.min(axis=1) feat_dataset['CGM_Max'] = dataset.max(axis=1) ##ENTROPY feat_dataset['CGM_Entropy'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Entropy'][i] = ts.sample_entropy( np.array(dataset.iloc[i, :])) ##RMS feat_dataset['CGM_RMS'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_RMS'][i] = np.sqrt(np.mean(dataset.iloc[i, :]**2)) #Correlation feat_dataset['CGM_Correlation'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Correlation'][i] = ts.autocorrelation( np.array(dataset.iloc[i, :]), 1) ##Number_of_Peaks feat_dataset['CGM_Peaks'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Peaks'][i] = ts.number_peaks( np.array(dataset.iloc[i, :]), 2) #CGM Velocity feat_dataset['CGM_Velocity'] = np.nan for i in range(len(dataset)): c_list = dataset.loc[i, :].tolist() sum_ = [] for j in range(1, len(c_list)): sum_.append(abs(c_list[j] - c_list[j - 1])) feat_dataset['CGM_Velocity'][i] = np.round(np.mean(sum_), 2) #MinMax feat_dataset['CGM_MinMax'] = np.nan feat_dataset[ 'CGM_MinMax'] = feat_dataset['CGM_Max'] - feat_dataset['CGM_Min'] ##SKewness feat_dataset['CGM_Skewness'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Skewness'][i] = ts.skewness(dataset.loc[i, :]) #CGM_Displacement feat_dataset['CGM_Displacement'] = np.nan for i in range(len(dataset)): c_list = dataset.loc[i, :].tolist() sum_ = [] for j in range(1, len(c_list)): sum_.append(abs(c_list[j] - c_list[j - 1])) feat_dataset['CGM_Displacement'][i] = np.round(np.sum(sum_), 2) #CGM_Kurtosis feat_dataset['CGM_Kurtosis'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Kurtosis'][i] = ts.kurtosis( np.array(dataset.iloc[i, :])) #Recurr feat_dataset['CGM_Recur'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Recur'][ i] = ts.ratio_value_number_to_time_series_length( np.array(dataset.iloc[i, :])) #Remove calculated columns del feat_dataset['CGM_Max'] del feat_dataset['CGM_Min'] feat_dataset = feat_dataset[[ 'CGM_Entropy', 'CGM_RMS', 'CGM_Correlation', 'CGM_Peaks', 'CGM_Velocity', 'CGM_MinMax', 'CGM_Skewness', 'CGM_Displacement', 'CGM_Kurtosis', 'CGM_Recur' ]] return feat_dataset