def get_features_from_one_signal(X, sample_rate=50): assert X.ndim == 1, "Expected single signal in feature extraction" mean = np.mean(X) stdev = np.std(X) abs_energy = fc.abs_energy(X) sum_of_changes = fc.absolute_sum_of_changes(X) autoc = fc.autocorrelation(X, sample_rate) count_above_mean = fc.count_above_mean(X) count_below_mean = fc.count_below_mean(X) kurtosis = fc.kurtosis(X) longest_above = fc.longest_strike_above_mean(X) zero_crossing = fc.number_crossing_m(X, mean) num_peaks = fc.number_peaks(X, int(sample_rate / 10)) sample_entropy = fc.sample_entropy(X) spectral_density = fc.spkt_welch_density(X, [{ "coeff": 1 }, { "coeff": 2 }, { "coeff": 3 }, { "coeff": 4 }, { "coeff": 5 }, { "coeff": 6 }]) c, v = zip(*spectral_density) v = np.asarray(v) return [ mean, stdev, abs_energy, sum_of_changes, autoc, count_above_mean, count_below_mean, kurtosis, longest_above, zero_crossing, num_peaks, sample_entropy, v[0], v[1], v[2], v[3], v[4], v[5] ]
def get_sta_features(self, data): """ Calculate the value of 9 kinds of selected statistical features :param data: :return: """ def _cal_trend(data): time_list = np.arange(len(data)) # create linear regression object regr = linear_model.LinearRegression() regr.fit(time_list.reshape(-1, 1), np.array(data).reshape(-1, 1)) return regr.coef_[0][0] E = ts.abs_energy(data) S = ts.binned_entropy(data, max_bins=5) ro = ts.autocorrelation(data, lag=4) skewness = ts.skewness(data) kurtosis = ts.kurtosis(data) trend = _cal_trend(data) mean = ts.mean(data) min = ts.minimum(data) max = ts.maximum(data) return [E, S, ro, skewness, kurtosis, trend, mean, min, max]
def time_series_kurtosis(x): """ :param x: the time series to calculate the feature of :type x: pandas.Series :return: the value of this feature :return type: float """ return ts_feature_calculators.kurtosis(x)
def TS_features2(signal): duplicate = ts.has_duplicate(signal) # t.f duplicate_max = ts.has_duplicate_max(signal) # t.f duplicate_min = ts.has_duplicate_min(signal) # t.f kurtosis = ts.kurtosis(signal) longest_strike_above = ts.longest_strike_above_mean(signal) longest_strike_below = ts.longest_strike_below_mean(signal) return duplicate, duplicate_max, duplicate_min, kurtosis, longest_strike_above, longest_strike_below
def time_series_kurtosis(x): """ Returns the kurtosis of x (calculated with the adjusted Fisher-Pearson standardized moment coefficient G2). :param x: the time series to calculate the feature of :type x: pandas.Series :return: the value of this feature :return type: float """ return ts_feature_calculators.kurtosis(x)
def extract_feats(ts): std = fc.standard_deviation(ts) kurtosis = fc.kurtosis(ts) skewness = fc.skewness(ts) cam = fc.count_above_mean(ts) cbm = fc.count_below_mean(ts) lsam = fc.longest_strike_above_mean(ts) lsbm = fc.longest_strike_below_mean(ts) psd = fc.fourier_entropy(ts, bins=1000000) energy = fc.abs_energy(ts) return np.array( [std, kurtosis, skewness, cam, cbm, lsam, lsbm, psd, energy])
def transform(self, value): if value is None: return None # TODO: remove try-except and validate value in order to avoid exception try: return [ abs_energy(value), kurtosis(value), mean_abs_change(value), skewness(value), count_above_mean(value) / len(value), count_below_mean(value) / len(value) ] except: return None
def main(): dirname = os.path.realpath('.') excelF = dirname + '\\Summary.xlsx' myworkbook = openpyxl.load_workbook(excelF) worksheet = myworkbook['SummaryPatients'] file = 1 for filename in glob.glob(dirname + "\*.txt"): data = open(filename, 'r') totalData = {} time = [] totalForceL = [] totalForceR = [] id = [] for line in data: tempForce = line.split() id.append(1) time.append(float(tempForce[0])) totalForceL.append(float(tempForce[17])) totalForceR.append(float(tempForce[18])) totalData["id"] = id totalData["time"] = time totalData["totalForceL"] = totalForceL totalData["totalForceR"] = totalForceR dataPandas = pd.DataFrame.from_dict(totalData) extracted_features = {} #extract_featuresL = extract_features(dataPandas, column_id="id", column_kind=None, column_value=None) worksheet['A' + str(file + 1)] = file if 'Pt' in filename: worksheet['B' + str(file + 1)] = 1 else: worksheet['B' + str(file + 1)] = 0 worksheet['C' + str(file + 1)] = tf.abs_energy( totalData["totalForceL"]) worksheet['D' + str(file + 1)] = tf.abs_energy( totalData["totalForceR"]) worksheet['E' + str(file + 1)] = tf.kurtosis(totalData["totalForceL"]) worksheet['F' + str(file + 1)] = tf.kurtosis(totalData["totalForceR"]) worksheet['G' + str(file + 1)] = tf.skewness(totalData["totalForceL"]) worksheet['H' + str(file + 1)] = tf.skewness(totalData["totalForceR"]) worksheet['I' + str(file + 1)] = tf.median(totalData["totalForceL"]) worksheet['J' + str(file + 1)] = tf.median(totalData["totalForceR"]) worksheet['K' + str(file + 1)] = tf.mean(totalData["totalForceL"]) worksheet['L' + str(file + 1)] = tf.mean(totalData["totalForceR"]) worksheet['M' + str(file + 1)] = tf.variance(totalData["totalForceL"]) worksheet['N' + str(file + 1)] = tf.variance(totalData["totalForceR"]) temp = tf.fft_aggregated(totalData["totalForceL"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp: if int == 0: worksheet['O' + str(file + 1)] = list[1] if int == 1: worksheet['P' + str(file + 1)] = list[1] if int == 2: worksheet['Q' + str(file + 1)] = list[1] if int == 3: worksheet['R' + str(file + 1)] = list[1] int += 1 temp2 = tf.fft_aggregated(totalData["totalForceR"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp2: if int == 0: worksheet['S' + str(file + 1)] = list[1] if int == 1: worksheet['T' + str(file + 1)] = list[1] if int == 2: worksheet['U' + str(file + 1)] = list[1] if int == 3: worksheet['V' + str(file + 1)] = list[1] int += 1 file += 1 myworkbook.save(excelF)
def log_kurtosis(x): return np.log(fc.kurtosis(x))
def kurtosis(x): return fc.kurtosis(x)
def kurtosis(current_observation: pd.DataFrame, raw_key: str): return tsf.kurtosis(current_observation[raw_key])
# %% fft_coef = [] index_ar = [] for i in range(len(meal_data_frame)): (d, c) = pywt.dwt(meal_data_frame.iloc[i, :], 'db2') (d, c) = pywt.dwt(d, 'db2') fft_coef.append(c) index_ar.append(i) # %% # Feature: Kurtosis MealKurtosis = [] for i in range(meal_data_len): row = meal_data_frame.iloc[i, :] kurtVal = feature_calculators.kurtosis(row) MealKurtosis.append(kurtVal) # %% Meal_Amp = np.zeros(meal_data_len) for i in range(meal_data_len): Meal_Amp[i] = (np.max(meal_data_frame.iloc[i, :]) - np.min(meal_data_frame.iloc[i, :])) # %% Meal_diff = [] for i in range(meal_data_len): dayCOGA = [] for j in range(7, 24): if not pd.isnull(meal_data_frame.iloc[i][j]) and not pd.isnull( meal_data_frame.iloc[i][j - 7]): dayCOGA.append(meal_data_frame.iloc[i][j] - meal_data_frame.iloc[i][j - 7])
def createFeature(np_meal_data, np_no_meal_data): # Now we start by creating features for meal and no meal data df_meal = pd.DataFrame(np_meal_data) df_no_meal = pd.DataFrame(np_no_meal_data) # initialize the feature dataframes feature_meal = pd.DataFrame() feature_no_meal = pd.DataFrame() # first feature tmin - tmax =diffTime # for meal ff1 = pd.DataFrame() ff1['diffTime'] = df_meal.apply( lambda row: abs(row.idxmax() - row.idxmin()), axis=1) # feature_meal = feature_meal.merge(ff1, left_index=True, right_index = True) feature_meal = ff1 # for no meal ff1 = pd.DataFrame() ff1['diffTime'] = df_no_meal.apply( lambda row: abs(row.idxmax() - row.idxmin()), axis=1) # feature_no_meal = feature_no_meal.merge(ff1, left_index=True, right_index = True) feature_no_meal = ff1 # second feature Glucosemin- GlucoseMax # for meal ff2 = pd.DataFrame() ff2['diffGlucose'] = df_meal.apply(lambda row: row.max() - row.min(), axis=1) feature_meal = feature_meal.merge(ff2, left_index=True, right_index=True) # for no meal ff2 = pd.DataFrame() ff2['diffGlucose'] = df_no_meal.apply(lambda row: row.max() - row.min(), axis=1) feature_no_meal = feature_no_meal.merge(ff2, left_index=True, right_index=True) # third feature Fourier transform def fourier(row): val = abs(fft(row)) val.sort() return np.flip(val)[0:3] # for meal ff31 = pd.DataFrame() ff31['FFT'] = df_meal.apply(lambda x: fourier(x), axis=1) ff3 = pd.DataFrame(ff31.FFT.tolist(), columns=['FFT1', 'FFT2', 'FFT3']) feature_meal = feature_meal.merge(ff3, left_index=True, right_index=True) # for no meal ff31 = pd.DataFrame() ff31['FFT'] = df_no_meal.apply(lambda x: fourier(x), axis=1) ff3 = pd.DataFrame(ff31.FFT.tolist(), columns=['FFT1', 'FFT2', 'FFT3']) feature_no_meal = feature_no_meal.merge(ff3, left_index=True, right_index=True) # fourth feature - CGMVelocity # for meal feature_meal['CGMVelocity'] = np.nan for i in range(len(df_meal)): liste_temp = df_meal.loc[i, :].tolist() summer = [] for j in range(1, df_meal.shape[1]): summer.append(abs(liste_temp[j] - liste_temp[j - 1])) feature_meal.loc[i, 'CGMVelocity'] = np.round(np.mean(summer), 2) # for no meal feature_no_meal['CGMVelocity'] = np.nan for i in range(len(df_no_meal)): liste_temp = df_no_meal.loc[i, :].tolist() summer = [] for j in range(1, df_no_meal.shape[1]): summer.append(abs(liste_temp[j] - liste_temp[j - 1])) feature_no_meal.loc[i, 'CGMVelocity'] = np.round(np.mean(summer), 2) # fourth feature part 2 - tmax # for meal ff4 = pd.DataFrame() ff4['maxTime'] = df_meal.apply(lambda row: row.idxmax(), axis=1) feature_meal = feature_meal.merge(ff4, left_index=True, right_index=True) # for no meal ff4 = pd.DataFrame() ff4['maxTime'] = df_no_meal.apply(lambda row: row.idxmax(), axis=1) feature_no_meal = feature_no_meal.merge(ff4, left_index=True, right_index=True) # fifth feature skewness # for meal feature_meal['Skewness'] = np.nan for i in range(len(df_meal)): feature_meal['Skewness'][i] = ts.skewness(df_meal.loc[i, :]) # for no meal feature_no_meal['Skewness'] = np.nan for i in range(len(df_no_meal)): feature_no_meal['Skewness'][i] = ts.skewness(df_no_meal.loc[i, :]) # sixth feature entorpy # for meal # feature_meal['Entropy'] = np.nan # for i in range(len(df_meal)): # feature_meal['Entropy'][i] = ts.sample_entropy(np.array(df_meal.iloc[i, :])) # # for no meal # feature_no_meal['Entropy'] = np.nan # for i in range(len(df_no_meal)): # feature_no_meal['Entropy'][i] = ts.sample_entropy(np.array(df_no_meal.iloc[i, :])) # seventh feature kurtosis # for meal feature_meal['Kurt'] = np.nan for i in range(len(df_meal)): feature_meal['Kurt'][i] = ts.kurtosis(np.array(df_meal.iloc[i, :])) # for no meal feature_no_meal['Kurt'] = np.nan for i in range(len(df_no_meal)): feature_no_meal['Kurt'][i] = ts.kurtosis( np.array(df_no_meal.iloc[i, :])) return feature_meal, feature_no_meal
def main(): dirname = os.path.realpath('.') filename = dirname + '\\GaPt07_01.txt' data = open(filename, 'r') totalData = {} time = [] totalForceL = [] totalForceR = [] id = [] for line in data: tempForce = line.split() id.append(1) time.append(float(tempForce[0])) totalForceL.append(float(tempForce[17])) totalForceR.append(float(tempForce[18])) totalData["id"] = id totalData["time"] = time totalData["totalForceL"] = totalForceL totalData["totalForceR"] = totalForceR dataPandas = pd.DataFrame.from_dict(totalData) extracted_features = {} #extract_featuresL = extract_features(dataPandas, column_id="id", column_kind=None, column_value=None) extracted_features["absEnergyL"] = tf.abs_energy(totalData["totalForceL"]) extracted_features["absEnergyR"] = tf.abs_energy(totalData["totalForceR"]) extracted_features["kurtosisL"] = tf.kurtosis(totalData["totalForceL"]) extracted_features["kurtosisR"] = tf.kurtosis(totalData["totalForceR"]) extracted_features["skewnessL"] = tf.skewness(totalData["totalForceL"]) extracted_features["skewnessR"] = tf.skewness(totalData["totalForceR"]) extracted_features["medianL"] = tf.median(totalData["totalForceL"]) extracted_features["medianR"] = tf.median(totalData["totalForceR"]) extracted_features["meanL"] = tf.mean(totalData["totalForceL"]) extracted_features["meanR"] = tf.mean(totalData["totalForceR"]) extracted_features["varianceL"] = tf.variance(totalData["totalForceL"]) extracted_features["varianceR"] = tf.variance(totalData["totalForceR"]) temp = tf.fft_aggregated(totalData["totalForceL"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp: if int == 0: extracted_features["fftCentroidL"] = list if int == 1: extracted_features["fftVarianceL"] = list if int == 2: extracted_features["fftSkewL"] = list if int == 3: extracted_features["fftKurtosisL"] = list int += 1 temp2 = tf.fft_aggregated(totalData["totalForceR"], [{ "aggtype": "centroid" }, { "aggtype": "variance" }, { "aggtype": "skew" }, { "aggtype": "kurtosis" }]) int = 0 for list in temp2: if int == 0: extracted_features["fftCentroidR"] = list if int == 1: extracted_features["fftVarianceR"] = list if int == 2: extracted_features["fftSkewR"] = list if int == 3: extracted_features["fftKurtosisR"] = list int += 1
def agg_kurtosis(x): return feature_calculator.kurtosis(x)
def get_kurtosis(arr): res = np.array([kurtosis(arr)]) res = np.nan_to_num(res) return res
##SKewness feat_dataset['CGM_Skewness'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Skewness'][i] = ts.skewness(dataset.loc[i, :]) #CGM_Displacement feat_dataset['CGM_Displacement'] = np.nan for i in range(len(dataset)): c_list = dataset.loc[i, :].tolist() sum_ = [] for j in range(1, len(c_list)): sum_.append(abs(c_list[j] - c_list[j - 1])) feat_dataset['CGM_Displacement'][i] = np.round(np.sum(sum_), 2) #CGM_Kurtosis feat_dataset['CGM_Kurtosis'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Kurtosis'][i] = ts.kurtosis(np.array(dataset.iloc[i, :])) #Recurr feat_dataset['CGM_Recur'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Recur'][i] = ts.ratio_value_number_to_time_series_length( np.array(dataset.iloc[i, :])) #Remove calculated columns del feat_dataset['CGM_Max'] del feat_dataset['CGM_Min'] feat_dataset = feat_dataset[[ 'CGM_Entropy', 'CGM_RMS', 'CGM_Correlation', 'CGM_Peaks', 'CGM_Velocity', 'CGM_MinMax', 'CGM_Skewness', 'CGM_Displacement', 'CGM_Kurtosis', 'CGM_Recur' ]]
def feat_extraction(dataset): feat_dataset = pd.DataFrame(index=np.arange(len(dataset))) #Calculated columns feat_dataset['CGM_Min'] = dataset.min(axis=1) feat_dataset['CGM_Max'] = dataset.max(axis=1) ##ENTROPY feat_dataset['CGM_Entropy'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Entropy'][i] = ts.sample_entropy( np.array(dataset.iloc[i, :])) ##RMS feat_dataset['CGM_RMS'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_RMS'][i] = np.sqrt(np.mean(dataset.iloc[i, :]**2)) #Correlation feat_dataset['CGM_Correlation'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Correlation'][i] = ts.autocorrelation( np.array(dataset.iloc[i, :]), 1) ##Number_of_Peaks feat_dataset['CGM_Peaks'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Peaks'][i] = ts.number_peaks( np.array(dataset.iloc[i, :]), 2) #CGM Velocity feat_dataset['CGM_Velocity'] = np.nan for i in range(len(dataset)): c_list = dataset.loc[i, :].tolist() sum_ = [] for j in range(1, len(c_list)): sum_.append(abs(c_list[j] - c_list[j - 1])) feat_dataset['CGM_Velocity'][i] = np.round(np.mean(sum_), 2) #MinMax feat_dataset['CGM_MinMax'] = np.nan feat_dataset[ 'CGM_MinMax'] = feat_dataset['CGM_Max'] - feat_dataset['CGM_Min'] ##SKewness feat_dataset['CGM_Skewness'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Skewness'][i] = ts.skewness(dataset.loc[i, :]) #CGM_Displacement feat_dataset['CGM_Displacement'] = np.nan for i in range(len(dataset)): c_list = dataset.loc[i, :].tolist() sum_ = [] for j in range(1, len(c_list)): sum_.append(abs(c_list[j] - c_list[j - 1])) feat_dataset['CGM_Displacement'][i] = np.round(np.sum(sum_), 2) #CGM_Kurtosis feat_dataset['CGM_Kurtosis'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Kurtosis'][i] = ts.kurtosis( np.array(dataset.iloc[i, :])) #Recurr feat_dataset['CGM_Recur'] = np.nan for i in range(len(dataset)): feat_dataset['CGM_Recur'][ i] = ts.ratio_value_number_to_time_series_length( np.array(dataset.iloc[i, :])) #Remove calculated columns del feat_dataset['CGM_Max'] del feat_dataset['CGM_Min'] feat_dataset = feat_dataset[[ 'CGM_Entropy', 'CGM_RMS', 'CGM_Correlation', 'CGM_Peaks', 'CGM_Velocity', 'CGM_MinMax', 'CGM_Skewness', 'CGM_Displacement', 'CGM_Kurtosis', 'CGM_Recur' ]] return feat_dataset
memory_data = pickle.load(fp_m) with open( '/Users/jiayun/PycharmProjects/D' 'ecision making projext/partner/Analysis_IEEG/ZHAO/e' 'eg_p_filtered.pkl', 'rb') as fp_p: perception_data = pickle.load(fp_p) kurto_matrix = [] for i in range(memory_data.shape[0]): l1 = memory_data[i] k_l = [] for j in range(l1.shape[0]): signal = l1[j, :] signal = normalize(signal, range=3, offset=None) kurto_j = ts.kurtosis(signal) kurto_j = round(kurto_j, 3) k_l.append(kurto_j) k_j = np.asarray(k_l) kurto_matrix.append(k_j) k_m = np.asarray(kurto_matrix) unavaliable_experments_m = np.where(k_m >= 15) unavaliable_experments_m = np.asanyarray(unavaliable_experments_m) print("Unavaliable memory experiments dete" "cted, '[lead], [trail]':", unavaliable_experments_m[0], unavaliable_experments_m[1]) kurto_matrix = [] for i in range(perception_data.shape[0]): l1 = perception_data[i]
def kurtosis(self): return np.array( [tcal.kurtosis(self.data[i, :]) for i in range(len(self.data))])