Esempio n. 1
0
def test_clustering_tree(directory=None):
    s = np.array([[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0],
                  [1., 2, 0, 0, 0, 0, 0, 1, 1], [0., 0, 1, 2, 1, 0, 1, 0, 0],
                  [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1],
                  [1., 2, 0, 0, 0, 0, 0, 1, 1]])

    def test_hook(from_idx, to_idx, distance):
        assert (from_idx, to_idx) in [(3, 0), (4, 1), (5, 2), (6, 2), (1, 0),
                                      (2, 0)]

    model = clustering.Hierarchical(dtw.distance_matrix_fast, {},
                                    merge_hook=test_hook,
                                    show_progress=False)
    modelw = clustering.HierarchicalTree(model)
    cluster_idx = modelw.fit(s)
    assert cluster_idx[0] == {0, 1, 2, 3, 4, 5, 6}

    if directory:
        hierarchy_fn = os.path.join(directory, "hierarchy.png")
        graphviz_fn = os.path.join(directory, "hierarchy.dot")
    else:
        file = tempfile.NamedTemporaryFile()
        hierarchy_fn = file.name + "_hierarchy.png"
        graphviz_fn = file.name + "_hierarchy.dot"
    modelw.plot(hierarchy_fn)
    print("Figure saved to", hierarchy_fn)
    with open(graphviz_fn, "w") as ofile:
        print(modelw.to_dot(), file=ofile)
    print("Dot saved to", graphviz_fn)
def main():
    s = np.array([
        np.flip([0., 0, 1, 2, 1, 0, 1, 0, 0, 1]),
        [0., 1, 2, 0, 0, 0, 0, 0, 0, 1],
        np.flip([1., 2, 0, 0, 0, 0, 0, 1, 1, 1], 0),
        [0., 0, 1, 2, 1, 0, 1, 0, 0, 1], [0., 1, 2, 0, 0, 0, 0, 0, 0, 1],
        np.flip([1., 2, 0, 0, 0, 0, 0, 1, 1, 1], 0),
        np.flip([1., 2, 0, 0, 0, 0, 0, 1, 1, 1], 0)
    ])

    # Custom Hierarchical clustering
    model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {})
    cluster_idx = model1.fit(s)
    # Keep track of full tree by using the HierarchicalTree wrapper class
    model2 = clustering.HierarchicalTree(model1)
    cluster_idx = model2.fit(s)

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10))
    show_ts_label = lambda idx: "ts-" + str(idx)
    model2.plot('hierarchy.jpg',
                axes=ax,
                show_ts_label=show_ts_label,
                show_tr_label=True,
                ts_label_margin=-10,
                ts_left_margin=10,
                ts_sample_length=1)

    # reading png image file
    im = img.imread('hierarchy.jpg')

    # show image
    plt.imshow(im)
Esempio n. 3
0
def get_cluster():
    """
    Function to get the clustering for the time series getting the distances between
    each operation.
    """

    series = []
    aux_file_path = r'C:\TFM\auxdata\hist_protected.csv'
    data_path = r'C:\TFM\data\2018\2018.csv'

    hierarchical_plot = r'C:\TFM\dtw\hierarchical_cluster.png'
    linkage_plot = r'C:\TFM\dtw\linkage_cluster.png'

    df_aux = pd.read_csv(aux_file_path,
                         header=0,
                         delimiter=',',
                         parse_dates=[SEGMENT_BEGIN, SEGMENT_END])
    df_data = pd.read_csv(data_path,
                          header=0,
                          delimiter=',',
                          parse_dates=[DATE])

    # print(df_aux[SEGMENT_BEGIN, SEGMENT_END][df_data[OPERATION_ID_NUMBER] == 4])

    op_no = 28
    program_number = 1108805036

    # df1 = df[(df.a != -1) & (df.b != -1)]
    # begin_date = (df_aux.loc[(df_aux[OPERATION_ID_NUMBER] == op_no)][SEGMENT_BEGIN])

    # Get begin date and end date for each time serie corresponding to the
    begin_date = (
        df_aux[(df_aux[OPERATION_ID_NUMBER] == op_no)
               & (df_aux[PROGRAM_NAME] == program_number)][SEGMENT_BEGIN])
    end_date = (
        df_aux[(df_aux[OPERATION_ID_NUMBER] == op_no)
               & (df_aux[PROGRAM_NAME] == program_number)][SEGMENT_END])

    data_index = begin_date.index

    # data_index = data_index[:30]

    for item in data_index:
        if item > YEAR_INDEX_LIMIT:
            break
        else:
            series_begin = begin_date[item]
            series_end = end_date[item]
            aux_series = df_data.loc[(df_data[DATE] >= series_begin)
                                     & (df_data[DATE] <= series_end)]
            if not aux_series.empty:
                df_spload = aux_series[SPINDLE_LOAD]
                df_spload = np.array(df_spload)
                series.append(df_spload)

    # Custom Hierarchical clustering
    model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {})
    cluster_idx = model1.fit(series)

    try:
        # Augment Hierarchical object to keep track of the full tree
        model2 = clustering.HierarchicalTree(model1)
        cluster_idx = model2.fit(series)
        model2.plot(hierarchical_plot, show_tr_label=True)
    except Exception as ex:
        print(ex)
    # SciPy linkage clustering
    try:
        model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {})
        cluster_idx = model3.fit(series)
        model3.plot(linkage_plot, show_tr_label=True)
    except Exception as ex:
        print(ex)
head = list(df.columns.values) # get machine names
print("head", head) # print machine names

df = df.T # transpose the data
df = df.values

ds = dtw.distance_matrix_fast(df) # get dist matrix
ds[ds == inf] = 0 # replace all infinity vals in the dist matrix with 0.

pd.DataFrame(ds).to_excel("ds.xlsx") # save dist matrix to a xlsx.

# clustering starts
# Custom Hierarchical clustering
model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {})
# Augment Hierarchical object to keep track of the full tree
model2 = clustering.HierarchicalTree(model1)
# SciPy linkage clustering
model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {})

cluster_idx = model3.fit(df)

# plot
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 15))
model3.plot("hierarchy.png", axes=ax, show_ts_label=head,
           show_tr_label=True, ts_label_margin=-10,
           ts_left_margin=10, ts_sample_length=1)
		   
# to find number of clusters
NumberOfClusters=range(2,30)
silhouette_score_values=list()
Esempio n. 5
0
def cluster(time_series_set, name):

    path = "./static/cluster_data.csv"
    cluster_data = csv.reader(open(path, 'r'))

    name_list = []
    series_list = []

    for row in cluster_data:
        #print(row)
        #print("row", row)
        name_list.append(row[0])
        #print("name", name_list)
        series = row[1:]
        #print("series", series)
        float_series = []
        for i in series:
            float_series.append(float(i))
        np_series = np.array(float_series)
        temp_series = stats.zscore(np_series)
        series_list.append(temp_series)

    if name not in name_list:
        # timeseries是性能指标序列
        time_series = []
        time_series_with_name = []
        time_series_with_name.append(name)
        for row in time_series_set:
            time_series.append(row[1])
            time_series_with_name.append(row[1])
        #print(time_series)

        with open(path, 'a') as f:
            csv_write = csv.writer(f)
            csv_write.writerow(time_series_with_name)
            f.close()

        name_list.append(name)
        float_series = []
        for i in time_series:
            float_series.append(float(i))
        np_series = np.array(float_series)
        temp_series = stats.zscore(np_series)
        series_list.append(temp_series)

    # Custom Hierarchical clustering
    model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {})
    cluster_idx = model1.fit(series_list)
    # Augment Hierarchical object to keep track of the full tree
    model2 = clustering.HierarchicalTree(model1)
    cluster_idx = model2.fit(series_list)
    # SciPy linkage clustering
    model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {})
    cluster_idx = model3.fit(series_list)

    # model2.plot("hierarchy.png")

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10))
    show_ts_label = lambda idx: name_list[idx]
    model2.plot("hierarchy.png",
                axes=ax,
                show_ts_label=show_ts_label,
                show_tr_label=True,
                ts_label_margin=-10,
                ts_left_margin=10,
                ts_sample_length=1)