예제 #1
0
import pandas as pd


def DTWDistance(s1, s2, w: float = np.inf):
    DTW = {}

    w = max(w, abs(len(s1) - len(s2)))

    for i in range(-1, len(s1)):
        for j in range(-1, len(s2)):
            DTW[(i, j)] = float("inf")
    DTW[(-1, -1)] = 0

    for i in range(len(s1)):
        for j in range(max(0, i - w), min(len(s2), i + w)):
            dist = (s1[i] - s2[j]) ** 2
            DTW[(i, j)] = dist + min(
                DTW[(i - 1, j)], DTW[(i, j - 1)], DTW[(i - 1, j - 1)]
            )

    return sqrt(DTW[len(s1) - 1, len(s2) - 1])


if __name__ == "__main__":
    data = np.array(pd.read_csv("data/data1.csv").head(10))[(0, 1, 3, 5, 6, 8), 1:]
    print(data)
    print(data.shape)
    clust = Cluster(data, metric=DTWDistance)
    clust.print(2)
    clust.dendogram()
def basic_features_extract(data):
    return extract_features(data, column_id="id", column_sort="time")


def extract_features_from_TS(Data, y):
    extracted_features = basic_features_extract(Data)
    impute(extracted_features)
    # features_filtered = select_features(extracted_features, y)
    features_filtered_direct = extract_relevant_features(
        Data, y, column_id="id", column_sort="time"
    )
    return extracted_features, features_filtered_direct


if __name__ == "__main__":
    n_series = 10
    n_clust = 4
    features = np.concatenate(
        [np.loadtxt(f"data/f{i}.csv") for i in range(1, 4)], axis=0
    )[:n_series]
    features = features[(0, 1, 3, 5, 6, 8), :]
    print(f"Data recive : {features.shape}")
    clust = Cluster(features)
    print("Cluster initialized :)")
    lengths = list(map(len, clust.get(n_series)))
    plt.plot(list(range(len(lengths))), lengths)
    plt.show()
    clust.print(n_clust)
    clust.dendogram()