def k_means_clust(self, data, num_iter, progress=False, w=100, euclidean=True): ''' k-means clustering algorithm for time series data. dynamic time warping Euclidean distance used as default similarity measure. ''' self.centroids = random.sample(data,self.num_clust) for n in range(num_iter): if progress: print 'iteration ' + str(n + 1) # assign data points to clusters self.assignments = {} # assignments has the following structure: # { # centroid_idx: [idxs of time series that are nearest the centroid] # } for ind, i in enumerate(data): # ind is the data series number, i is the data series # define the minimum distance for the data series min_dist = float('inf') # define the index of the closest centroid closest_clust = None for c_ind, j in enumerate(self.centroids): # c_ind is the index of the centroid, j is the centroid if dtw.lb_keogh_onQuery(i, j, w, euclidean) < min_dist: print('lb less than min dist -- calculating full dtw') # could use windowed dtw here instead to speed things up cur_dist = dtw.dist_dtw(i, j, euclidean) if cur_dist < min_dist: min_dist = cur_dist closest_clust = c_ind # add the index of the current data series to a cluster if closest_clust in self.assignments: self.assignments[closest_clust].append(ind) else: self.assignments[closest_clust] = [] # now that we've updated the clusters that each series is part of, # we recalculate the centroids of the clusters for key in self.assignments: # key is the index of a centroid in the centroids list clust_sum = 0 for k in self.assignments[key]: # k is the index of a time series in the current cluster # add the time series to the cluster sum clust_sum = clust_sum + data[k] # update each point in the centroid with the average of all points in the cluster of time # series around the centroid self.centroids[key] = [m / len(self.assignments[key]) for m in clust_sum]
def dtw(query, subject, squared=True): """unconstrained Euclidean-flavoured DTW """ return ldtw.dist_dtw(ldtw.TimeSeries(query), ldtw.TimeSeries(subject), squared)
# do for Euclidean and Manhatten mode for mode, name in [(True, 'Euclidean'), (False, 'Manhatten')]: # calculate naive L_p-norm print dtw.dist_euclidean(query, subject) \ if mode == True else \ dtw.dist_manhatten(query, subject) # calculate a bunch of windowed DTWs (error should decrease monotonically) for window in range(0, max(len(subject), len(query)), 32): gamma = dtw.WarpingPath() print dtw.dist_cdtw_backtrace(query, subject, window, gamma, mode) pl.plot(*zip(*[node[::-1] for node in gamma])) # calculate full DTW print dtw.dist_dtw(query, subject, mode) # plot objective function H(i,j) := |query(i)-subject(j)| pl.imshow([[abs(query[i]-subject[j]) for j in range(len(subject))] for i in range(len(query))], aspect="auto") pl.title(name) pl.show() # draw explicit alignment for full dtw pl.plot(query) pl.plot(subject) for i, j in gamma[0:len(gamma):4]: pl.plot([i, j], [query[i], subject[j]], c="grey") pl.show()
# do for Euclidean and Manhatten mode for mode, name in [(True, 'Euclidean'), (False, 'Manhatten')]: # calculate naive L_p-norm print dtw.dist_euclidean(query, subject) \ if mode == True else \ dtw.dist_manhatten(query, subject) # calculate a bunch of windowed DTWs (error should decrease monotonically) for window in range(0, max(len(subject), len(query)), 32): gamma = dtw.WarpingPath() print dtw.dist_cdtw_backtrace(query, subject, window, gamma, mode) pl.plot(*zip(*[node[::-1] for node in gamma])) # calculate full DTW print dtw.dist_dtw(query, subject, mode) # plot objective function H(i,j) := |query(i)-subject(j)| pl.imshow([[abs(query[i] - subject[j]) for j in range(len(subject))] for i in range(len(query))], aspect="auto") pl.title(name) pl.show() # draw explicit alignment for full dtw pl.plot(query) pl.plot(subject) for i, j in gamma[0:len(gamma):4]: pl.plot([i, j], [query[i], subject[j]], c="grey")