Beispiel #1
0
def time_series_sum_of_reoccurring_data_points(x):
    """
    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :return: the value of this feature
    :return type: float
    """
    return ts_feature_calculators.sum_of_reoccurring_data_points(x)
def TS_features12(signal):
    stand_deviation = ts.standard_deviation(signal)
    sum_reoccurring = ts.sum_of_reoccurring_data_points(signal)
    sum_r_value = ts.sum_of_reoccurring_values(signal)
    sum_v = ts.sum_values(signal)
    variance = ts.variance(signal)
    variance_larger_than_sd = ts.variance_larger_than_standard_deviation(
        signal)
    return stand_deviation, sum_reoccurring, sum_r_value, sum_v, variance, variance_larger_than_sd
Beispiel #3
0
def time_series_sum_of_reoccurring_data_points(x):
    """
    Returns the sum of all data points, that are present in the time series
    more than once.
    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :return: the value of this feature
    :return type: float
    """
    return ts_feature_calculators.sum_of_reoccurring_data_points(x)
def feature_extraction(cgm_glucose):

    n = len(cgm_glucose[0])
    chunk_size = n // 4

    rows = []

    for row in cgm_glucose:
        val = []

        # Feature set 1 : Mean
        for i in range(0, 30, 6):
            val.append(fc.mean(row[i:i + 6]))

        # Feature set 2: Sum of reoccurring data points
        for i in range(0, 30, 6):
            val.append(fc.sum_of_reoccurring_data_points(row[i:i + 6]))

        # Computing the top 5 fft coefficients
        fft_coefficients = fft(row, n=6)[1:]
        fft_coefficients_real = [value.real for value in fft_coefficients]
        val += fft_coefficients_real
        # val.extend(fft_coefficients_real)

        # Feature set 4: Polyfit
        x = np.linspace(0, 1, len(row))
        y = row
        val.extend(poly.polyfit(x, y, 3)[:-1])

        rows.append(val)

    feature_matrix = pd.DataFrame(StandardScaler().fit_transform(rows))
    # Transform data to training dimensional space

    pca_components = pd.read_csv("pca_components.csv", header=None)
    transformedData = np.dot(feature_matrix, np.transpose(pca_components))

    # Run all 4 classfication algorithms on test data

    classify_test_data(transformedData)
def feature_extraction():
    cgm_glucose_levels_no_meal = pd.read_csv("preprocessed_Nomeal.csv")
    cgm_glucose_levels_meal = pd.read_csv("preprocessed_mealData.csv")

    cgm_glucose = np.concatenate(
        (cgm_glucose_levels_meal.values, cgm_glucose_levels_no_meal.values),
        axis=0)

    n = len(cgm_glucose[0])
    chunk_size = n // 4

    rows = []

    for row in cgm_glucose:
        val = []

        # Feature set 1 : Windowed Mean
        for i in range(0, 30, 6):
            val.append(fc.mean(row[i:i + 6]))

        # Feature set 2: Windowed Variance
        # for i in range(0,30,6):
        #     val.append(fc.variance(row[i:i+6]))
        for i in range(0, 30, 6):
            val.append(fc.sum_of_reoccurring_data_points(row[i:i + 6]))

        # Computing the top 5 fft coefficients
        fft_coefficients = fft(row, n=6)[1:]
        fft_coefficients_real = [value.real for value in fft_coefficients]
        val += fft_coefficients_real

        #val.append(np.sqrt(np.mean(row[24:]**2)))

        # Feature set 4: Polyfit

        x = np.linspace(0, 1, len(row))
        y = row
        val.extend(poly.polyfit(x, y, 3)[:-1])

        rows.append(val)

        # for i in range(0, 30, 6):
        #     val.append(fc.change_quantiles(row[i:i + 6],))

    feature_matrix = pd.DataFrame(StandardScaler().fit_transform(rows))
    labels = [1] * len(cgm_glucose_levels_meal)
    label_no = [0] * len(cgm_glucose_levels_no_meal)
    labels.extend(label_no)
    labels = np.array(labels)

    feature_matrix_meal = feature_matrix.iloc[:len(cgm_glucose_levels_meal
                                                   ), :].values
    feature_matrix_no_meal = feature_matrix.iloc[
        len(cgm_glucose_levels_meal):, :].values

    pca = PCA()
    pca.fit(feature_matrix_meal)
    # print("PCA explained variance: ")
    # print(pca.explained_variance_ratio_)

    # Saving new dim-space
    pd.DataFrame(pca.components_[:5]).to_csv("pca_components.csv",
                                             header=None,
                                             index=None)

    transformedData = np.dot(feature_matrix_meal,
                             np.transpose(pca.components_[:5]))
    transformedData_no_meal = np.dot(feature_matrix_no_meal,
                                     np.transpose(pca.components_[:5]))

    transformedData = np.concatenate(
        (transformedData, transformedData_no_meal))

    return transformedData, labels