예제 #1
0
def extract_features_with_param(time_series, window):
    ## type: (object, object) -> object
    ## type: (object, object) -> object
    """
    Extracts three types of features from the time series.
    :param time_series: the time series to extract the feature of
    :type time_series: pandas.Series
    :param window: the length of window
    :type window: int
    :return: the value of features
    :return type: list with float
    """
    # if not tsd_common.is_standard_time_series(time_series, window):
    #     # add your report of this error here...
    #
    #     return []

    # spilt time_series
    split_time_series = tsd_common.split_time_series(time_series, window)
    normalized_split_time_series = tsd_common.normalize_time_series(
        split_time_series)
    max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(
        split_time_series)

    s_features_with_parameter1 = statistical_features.get_parameters_features(
        max_min_normalized_time_series)
    # s_features_with_parameter2 = statistical_features.get_parameters_features(normalized_split_time_series)
    features = s_features_with_parameter1
    return features
예제 #2
0
def extract_features(time_series, window):
    """
    Extracts three types of features from the time series.

    :param time_series: the time series to extract the feature of
    :type time_series: pandas.Series
    :param window: the length of window
    :type window: int
    :return: the value of features
    :return type: list with float
    """
    if not tsd_common.is_standard_time_series(time_series, window):
        # add your report of this error here...

        return []

    # spilt time_series
    split_time_series = tsd_common.split_time_series(time_series, window)
    # nomalize time_series
    normalized_split_time_series = tsd_common.normalize_time_series(split_time_series)
    max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(split_time_series)
    s_features = statistical_features.get_statistical_features(normalized_split_time_series[4])
    f_features = fitting_features.get_fitting_features(normalized_split_time_series)
    c_features = classification_features.get_classification_features(max_min_normalized_time_series)
    # combine features with types
    features = s_features + f_features + c_features
    return features
예제 #3
0
def time_series_window_parts_value_distribution_with_threshold(x):
    """
    Split the whole time series into five parts.
    Given a threshold = 0.01, return the percentage of elements of time series
    which are less than threshold

    :param x: normalized time series
    :type x: pandas.Series
    :return: 5 values of this feature
    :return type: list
    """
    threshold = 0.01
    split_value_list = split_time_series(x, DEFAULT_WINDOW)

    count_list = []
    for value_list in split_value_list:
        nparray_threshold = np.array(value_list)
        nparray_threshold[nparray_threshold < threshold] = -1
        count_list.append((nparray_threshold == -1).sum())

    if sum(count_list) == 0:
        features = [0, 0, 0, 0, 0]
    else:
        features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))

    return features
예제 #4
0
def extract_features(time_series, window):
    """
    Extracts three types of features from the time series.

    :param time_series: the time series to extract the feature of
    :type time_series: pandas.Series
    :param window: the length of window
    :type window: int
    :return: the value of features
    :return type: list with float
    """
    if not tsd_common.is_standard_time_series(time_series, window):
        # add your report of this error here...

        return []

    # spilt time_series
    split_time_series = tsd_common.split_time_series(time_series, window)
    # nomalize time_series
    normalized_split_time_series = tsd_common.normalize_time_series(
        split_time_series)
    max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(
        split_time_series)
    s_features = statistical_features.get_statistical_features(
        normalized_split_time_series[4])
    f_features = fitting_features.get_fitting_features(
        normalized_split_time_series)
    c_features = classification_features.get_classification_features(
        max_min_normalized_time_series)
    # combine features with types
    features = s_features + f_features + c_features
    return features
예제 #5
0
def time_series_window_parts_value_distribution_with_threshold(x):
    """
    Split the whole time series into five parts.
    Given a threshold = 0.01, return the percentage of elements of time series
    which are less than threshold

    :param x: normalized time series
    :type x: pandas.Series
    :return: 5 values of this feature
    :return type: list
    """
    threshold = 0.01
    split_value_list = split_time_series(x, DEFAULT_WINDOW)

    count_list = []
    for value_list in split_value_list:
        nparray_threshold = np.array(value_list)
        nparray_threshold[nparray_threshold < threshold] = -1
        count_list.append((nparray_threshold == -1).sum())

    if sum(count_list) == 0:
        features = [0, 0, 0, 0, 0]
    else:
        features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))

    return features
예제 #6
0
def calculate_all_features(time_series, window):
    """
    Extracts three types of features from the time series.
    :param time_series: the time series to extract the feature of
    :type time_series: pandas.Series
    :param window: the length of window
    :type window: int
    :return: the value of features
    :return type: list with float
    """

    split_time_series = tsd_common.split_time_series(time_series, window)
    normalized_split_time_series = tsd_common.normalize_time_series(
        split_time_series)
    max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(
        split_time_series)

    # s_features = statistical_features.get_statistical_features(normalized_split_time_series[4])
    # c_features = classification_features.get_classification_features(max_min_normalized_time_series)
    # f_features = fitting_features.get_fitting_features(normalized_split_time_series)
    # s_features_with_parameter1 = feature_calculate.get_parameters_features(max_min_normalized_time_series)

    # features = s_features + c_features + f_features + s_features_with_parameter1

    anom_feature = feature_calculate.get_classification_features_test(
        normalized_split_time_series)
    pattern_feature = feature_calculate.get_classification_feature_pattern(
        max_min_normalized_time_series)
    stat_feature = feature_calculate.get_classification_feature_stat(
        max_min_normalized_time_series)
    features = stat_feature + anom_feature + pattern_feature
    return features
예제 #7
0
def time_series_daily_parts_value_distribution_with_threshold(x):
    """
    Split the whole time series into three parts: c, b, a.
    Given a threshold = 0.01, return the percentage of elements of time series
    which are less than threshold

    :param x: normalized time series
    :type x: pandas.Series
    :return: 6 values of this feature
    :return type: list
    """
    threshold = 0.01
    split_value_list = split_time_series(x, DEFAULT_WINDOW)
    data_c = split_value_list[0] + split_value_list[1][1:]
    data_b = split_value_list[2] + split_value_list[3][1:]
    data_a = split_value_list[4]

    # the number of elements in time series which is less than threshold:
    nparray_data_c_threshold = np.array(data_c)
    nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1
    nparray_data_b_threshold = np.array(data_b)
    nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1
    nparray_data_a_threshold = np.array(data_a)
    nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1

    # the total number of elements in time series which is less than threshold:
    nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + (
        nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold
                                                 == -1).sum()

    if nparray_threshold_count == 0:
        features = [0, 0, 0]
    else:
        features = [(nparray_data_c_threshold == -1).sum() /
                    float(nparray_threshold_count),
                    (nparray_data_b_threshold == -1).sum() /
                    float(nparray_threshold_count),
                    (nparray_data_a_threshold == -1).sum() /
                    float(nparray_threshold_count)]

    features.extend([
        (nparray_data_c_threshold == -1).sum() / float(len(data_c)),
        (nparray_data_b_threshold == -1).sum() / float(len(data_b)),
        (nparray_data_a_threshold == -1).sum() / float(len(data_a))
    ])
    return features
예제 #8
0
def time_series_window_parts_value_distribution_with_threshold(x):
    """
    Split the whole time series into five parts.
    Given a threshold = 0.01, return the percentage of elements of time series
    which are less than threshold
    :param x: normalized time series
    :type x: pandas.Series
    :return: 5 values of this feature
    :return type: list
    """
    threshold = 0.01
    split_value_list = split_time_series(x, DEFAULT_WINDOW)

    count_list = []
    for value_list in split_value_list:
        nparray_threshold = np.array(value_list)
        nparray_threshold[nparray_threshold < threshold] = -1
        count_list.append((nparray_threshold == -1).sum())

    if sum(count_list) == 0:
        features = [0, 0, 0, 0, 0]
    else:
        features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))

    return features


#
# def get_classification_features(x):
#     """
#     :param x: splited time series normalized by maximun and minimum value
#     :return: list of some local anomaly features and morphological features
#     """
#     classification_features =[
#
#         {"time_series_autocorrelation_classification":time_series_autocorrelation(x)},
#         {"time_series_coefficient_of_variation_classification":time_series_coefficient_of_variation(x)},
#     ]
#     classification_features.extend(time_series_value_distribution(x))
#     # classification_features.extend(time_series_daily_parts_value_distribution(x))
#     # classification_features.extend(time_series_daily_parts_value_distribution_with_threshold(x))
#     # classification_features.extend(time_series_window_parts_value_distribution_with_threshold(x))
#     # classification_features.extend(time_series_binned_entropy(x))
#     # add yourself classification features here...
#
# return classification_features
예제 #9
0
def time_series_daily_parts_value_distribution(x):
    """
    Given buckets, calculate the percentage of elements in three subsequences
    of the whole time series in different buckets

    :param x: normalized time series
    :type x: pandas.Series
    :return: the values of this feature
    :return type: list
    """
    thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
    split_value_list = split_time_series(x, DEFAULT_WINDOW)
    data_c = split_value_list[0] + split_value_list[1][1:]
    data_b = split_value_list[2] + split_value_list[3][1:]
    data_a = split_value_list[4]
    count_c = list(np.histogram(data_c, bins=thresholds)[0])
    count_b = list(np.histogram(data_b, bins=thresholds)[0])
    count_a = list(np.histogram(data_a, bins=thresholds)[0])
    return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
예제 #10
0
def time_series_daily_parts_value_distribution(x):
    """
    Given buckets, calculate the percentage of elements in three subsequences
    of the whole time series in different buckets

    :param x: normalized time series
    :type x: pandas.Series
    :return: the values of this feature
    :return type: list
    """
    thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
    split_value_list = split_time_series(x, DEFAULT_WINDOW)
    data_c = split_value_list[0] + split_value_list[1][1:]
    data_b = split_value_list[2] + split_value_list[3][1:]
    data_a = split_value_list[4]
    count_c = list(np.histogram(data_c, bins=thresholds)[0])
    count_b = list(np.histogram(data_b, bins=thresholds)[0])
    count_a = list(np.histogram(data_a, bins=thresholds)[0])
    return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
예제 #11
0
def time_series_daily_parts_value_distribution_with_threshold(x):
    """
    Split the whole time series into three parts: c, b, a.
    Given a threshold = 0.01, return the percentage of elements of time series
    which are less than threshold

    :param x: normalized time series
    :type x: pandas.Series
    :return: 6 values of this feature
    :return type: list
    """
    threshold = 0.01
    split_value_list = split_time_series(x, DEFAULT_WINDOW)
    data_c = split_value_list[0] + split_value_list[1][1:]
    data_b = split_value_list[2] + split_value_list[3][1:]
    data_a = split_value_list[4]

    # the number of elements in time series which is less than threshold:
    nparray_data_c_threshold = np.array(data_c)
    nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1
    nparray_data_b_threshold = np.array(data_b)
    nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1
    nparray_data_a_threshold = np.array(data_a)
    nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1

    # the total number of elements in time series which is less than threshold:
    nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + (nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold == -1).sum()

    if nparray_threshold_count == 0:
        features = [0, 0, 0]
    else:
        features = [
            (nparray_data_c_threshold == -1).sum() / float(nparray_threshold_count),
            (nparray_data_b_threshold == -1).sum() / float(nparray_threshold_count),
            (nparray_data_a_threshold == -1).sum() / float(nparray_threshold_count)
        ]

    features.extend([
                    (nparray_data_c_threshold == -1).sum() / float(len(data_c)),
                    (nparray_data_b_threshold == -1).sum() / float(len(data_b)),
                    (nparray_data_a_threshold == -1).sum() / float(len(data_a))
                    ])
    return features
예제 #12
0
def extract_features_without_param(time_series, window):
    ## type: (object, object) -> object
    ## type: (object, object) -> object
    """
    Extracts three types of features from the time series.
    :param time_series: the time series to extract the feature of
    :type time_series: pandas.Series
    :param window: the length of window
    :type window: int
    :return: the value of features
    :return type: list with float
    """
    # if not tsd_common.is_standard_time_series(time_series, window):
    #     # add your report of this error here...
    #
    #     return []

    # spilt time_series
    split_time_series = tsd_common.split_time_series(time_series, window)
    split_time_series2 = tsd_common.split_time_series2(time_series, window)

    # nomalize time_series
    normalized_split_time_series = tsd_common.normalize_time_series(
        split_time_series)
    max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(
        split_time_series)
    s_features = statistical_features.get_statistical_features(
        normalized_split_time_series[4])
    f_features = fitting_features.get_fitting_features(
        normalized_split_time_series)
    c_features = classification_features.get_classification_features(
        max_min_normalized_time_series)
    # combine features with types
    # s_features_without_parameter = statistical_features.calculate_nonparameters_features(normalized_split_time_series[4])
    # s_features_with_parameter = statistical_features.get_parameters_features(normalized_split_time_series[4])
    # s_features_with_parameter = statistical_features.get_parameters_features(time_series)

    # features = c_features
    #     return s_features_with_parameter
    features = s_features + c_features + f_features
    # features = c_features
    return features
예제 #13
0
    def _f():
        threshold = 0.01
        split_value_list = split_time_series(x, DEFAULT_WINDOW)

        count_list = []
        a = 0
        for value_list in split_value_list:
            nparray_threshold = np.array(value_list)
            nparray_threshold[nparray_threshold < threshold] = -1
            temp = (nparray_threshold == -1).sum()
            count_list.append((nparray_threshold == -1).sum())
            name = ("time_series_window_parts_value_distribution_with_threshold_{}".format(a))
            a =a+1
            if sum(count_list) == 0:
                # features = [0, 0, 0, 0, 0]
                features = [{'time_series_window_parts_value_distribution_with_threshold_Ais0':0}, {'time_series_window_parts_value_distribution_with_threshold_bis0':0}, {'time_series_window_parts_value_distribution_with_threshold_cis0':0}, {'time_series_window_parts_value_distribution_with_threshold_Dis0':0}, {'time_series_window_parts_value_distribution_with_threshold_Eis0':0}]

            else:
                features = temp/float((DEFAULT_WINDOW + 1))
                    # list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))
            yield {'{}'.format(name):features}
예제 #14
0
def time_series_daily_parts_value_distribution(
        x):  ##问题是返回值最后是每行一个list---观察combine部分的数据返回值内容
    """
    :param x: normalized time series
    :type x: pandas.Series
    :return: the values of this feature
    :return type: list
    """
    thresholds = [
        0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0,
        1.0
    ]
    split_value_list = split_time_series(x, DEFAULT_WINDOW)
    data_c = split_value_list[0] + split_value_list[1][1:]
    data_b = split_value_list[2] + split_value_list[3][1:]
    data_a = split_value_list[4]
    count_c = list(np.histogram(data_c, bins=thresholds)[0])
    count_b = list(np.histogram(data_b, bins=thresholds)[0])
    count_a = list(np.histogram(data_a, bins=thresholds)[0])
    return list(np.array(count_c) / float(len(data_c))) + list(
        np.array(count_b) / float(len(data_b))) + list(
            np.array(count_a) / float(len(data_a)))