def main(tseries_fpath, k, plot_foldpath): import mkl mkl.set_num_threads(16) initialize_matplotlib() X = np.genfromtxt(tseries_fpath)[:, 1:] aux = X.sum(axis=1) fix = np.where(aux == 0)[0] X[fix] += .001 #fixing zero only rows X = X.copy() cent, assign, shift, dists_cent = ksc.inc_ksc(X, k) for i in range(cent.shape[0]): t_series = cent[i] plt.plot(t_series, '-k') plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) #plt.ylabel('Views') #plt.xlabel('Time') plt.savefig(os.path.join(plot_foldpath, '%d.pdf' % i)) plt.close() half = t_series.shape[0] // 2 to_shift = half - np.argmax(t_series) to_plot_peak_center = dist.shift(t_series, to_shift, rolling=True) plt.plot(to_plot_peak_center, '-k') plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) #plt.ylabel('Views') #plt.xlabel('Time') plt.savefig(os.path.join(plot_foldpath, '%d-peak-center.pdf' % i)) plt.close() to_shift = 0 - np.argmin(t_series) to_plot_min_first = dist.shift(t_series, to_shift, rolling=True) plt.plot(to_plot_min_first, '-k') plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) #plt.ylabel('Views') #plt.xlabel('Time') plt.savefig(os.path.join(plot_foldpath, '%d-min-first.pdf' % i)) plt.close() np.savetxt(os.path.join(plot_foldpath, 'cents.dat'), cent, fmt='%.5f') np.savetxt(os.path.join(plot_foldpath, 'assign.dat'), assign, fmt='%d') np.savetxt(os.path.join(plot_foldpath, 'shift.dat'), shift, fmt='%d') np.savetxt(os.path.join(plot_foldpath, 'dists_cent.dat'), dists_cent, fmt='%.5f')
def plot_series(t_series, plot_foldpath, name, shift=False): to_plot = t_series if shift: to_shift = 0 - np.argmin(t_series) to_plot = dist.shift(t_series, to_shift, rolling=True) plt.plot(to_plot, '-k') plt.ylabel('Views') plt.xlabel('Time') plt.savefig(os.path.join(plot_foldpath, '%s.png' % name)) plt.close()
def _compute_centroids(tseries, assign, num_clusters, to_shift=None): ''' Given a time series matrix and cluster assignments, this method will compute the spectral centroids for each cluster. Arguments --------- tseries: matrix (n_series, n_points) Time series beng clustered assign: array of ints (size = n_series) The cluster assignment for each time series num_clusters: int The number of clusters being searched for to_shift (optional): array of ints (size = n_series) Determines if time series should be shifted, if different from `None`. In this case, each series will be shifted by the corresponding amount in the array. ''' series_size = tseries.shape[1] centroids = np.ndarray((num_clusters, series_size)) #shift series for best centroid distance #TODO: this method can be cythonized and done in parallel shifted = tseries if to_shift is not None: for i in xrange(tseries.shape[0]): shifted[i] = shift(tseries[i], to_shift[i], rolling=True) #compute centroids for k in xrange(num_clusters): members = shifted[assign == k] if members.any(): num_members = 0 if members.ndim == 2: axis = 1 num_members = members.shape[0] else: axis = 0 num_members = 1 ssqs = np.tile(np.sum(members**2, axis=axis), (series_size, 1)) #the original papers divides by ssqs only, while the author's #example implementation uses sqrt. We chose sqrt because it appears #to yield better centroids. aux = members / np.sqrt(ssqs.T) x_mat = np.dot(aux.T, aux) i_mat = num_members * np.eye(series_size) m_mat = i_mat - x_mat #compute eigenvalues and chose the vector for the smallest one #TODO: Check if using scipy's linalg is faster (has more options # such as finding only the smallest eigval) _, eig_vectors = LA.eigh(m_mat, eigvals=(0, 0)) centroids[k] = eig_vectors[:,0] if centroids[k].sum() < 0: centroids[k] = -centroids[k] else: centroids[k] = np.zeros(series_size) return centroids
def test_shift_drop(self): array = np.array([1.0]) assert_array_equal(np.array([1.0]), dist.shift(array, 0, False)) assert_array_equal(np.array([0.0]), dist.shift(array, 1, False)) assert_array_equal(np.array([0.0]), dist.shift(array, 1, False)) assert_array_equal(np.array([0.0]), dist.shift(array, -2, False)) assert_array_equal(np.array([0.0]), dist.shift(array, -2, False)) array = np.array([1.0, 2.0, 3.0, 4.0]) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 0, False)) assert_array_equal(np.array([0.0, 1.0, 2.0, 3.0]), dist.shift(array, 1, False)) assert_array_equal(np.array([2.0, 3.0, 4.0, 0.0]), dist.shift(array, -1, False)) assert_array_equal(np.array([0.0, 0.0, 1.0, 2.0]), dist.shift(array, 2, False)) assert_array_equal(np.array([3.0, 4.0, 0.0, 0.0]), dist.shift(array, -2, False)) assert_array_equal(np.array([0.0, 0.0, 0.0, 1.0]), dist.shift(array, 3, False)) assert_array_equal(np.array([4.0, 0.0, 0.0, 0.0]), dist.shift(array, -3, False)) assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, 4, False)) assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, -4, False)) assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, 5, False)) assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, -5, False)) assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, 50, False)) assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, -50, False))
def test_shift_roll(self): array = np.array([]) assert_array_equal(np.array([]), dist.shift(array, 0)) assert_array_equal(np.array([]), dist.shift(array, -1)) assert_array_equal(np.array([]), dist.shift(array, 1)) assert_array_equal(np.array([]), dist.shift(array, 10)) assert_array_equal(np.array([]), dist.shift(array, -10)) array = np.array([1.0]) assert_array_equal(np.array([1.0]), dist.shift(array, 0, True)) assert_array_equal(np.array([1.0]), dist.shift(array, 1, True)) assert_array_equal(np.array([1.0]), dist.shift(array, 1, True)) assert_array_equal(np.array([1.0]), dist.shift(array, -2, True)) assert_array_equal(np.array([1.0]), dist.shift(array, -2, True)) array = np.array([1.0, 2.0, 3.0, 4.0]) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 0, True)) assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, 1, True)) assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]), dist.shift(array, -1, True)) assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]), dist.shift(array, 2, True)) assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]), dist.shift(array, -2, True)) assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]), dist.shift(array, 3, True)) assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, -3, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 4, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, -4, True)) assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, 5, True)) assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]), dist.shift(array, -5, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 8, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, -8, True))
def _compute_centroids(tseries, assign, num_clusters, to_shift=None): ''' Given a time series matrix and cluster assignments, this method will compute the spectral centroids for each cluster. Arguments --------- tseries: matrix (n_series, n_points) Time series beng clustered assign: array of ints (size = n_series) The cluster assignment for each time series num_clusters: int The number of clusters being searched for to_shift (optional): array of ints (size = n_series) Determines if time series should be shifted, if different from `None`. In this case, each series will be shifted by the corresponding amount in the array. ''' series_size = tseries.shape[1] centroids = np.ndarray((num_clusters, series_size)) #shift series for best centroid distance #TODO: this method can be cythonized and done in parallel shifted = tseries if to_shift is not None: for i in range(tseries.shape[0]): shifted[i] = shift(tseries[i], to_shift[i], rolling=True) #compute centroids for k in range(num_clusters): members = shifted[assign == k] if members.any(): num_members = 0 if members.ndim == 2: axis = 1 num_members = members.shape[0] else: axis = 0 num_members = 1 ssqs = np.tile(np.sum(members**2, axis=axis), (series_size, 1)) #the original papers divides by ssqs only, while the author's #example implementation uses sqrt. We chose sqrt because it appears #to yield better centroids. aux = members / np.sqrt(ssqs.T) x_mat = np.dot(aux.T, aux) i_mat = num_members * np.eye(series_size) m_mat = i_mat - x_mat #compute eigenvalues and chose the vector for the smallest one #TODO: Check if using scipy's linalg is faster (has more options # such as finding only the smallest eigval) _, eig_vectors = LA.eigh(m_mat, eigvals=(0, 0)) centroids[k] = eig_vectors[:, 0] if centroids[k].sum() < 0: centroids[k] = -centroids[k] else: centroids[k] = np.zeros(series_size) return centroids
def test_shift_roll(self): array = np.array([]) assert_array_equal(np.array([]), dist.shift(array, 0)) assert_array_equal(np.array([]), dist.shift(array, -1)) assert_array_equal(np.array([]), dist.shift(array, 1)) assert_array_equal(np.array([]), dist.shift(array, 10)) assert_array_equal(np.array([]), dist.shift(array, -10)) array = np.array([1.0]) assert_array_equal(np.array([1.0]), dist.shift(array, 0, True)) assert_array_equal(np.array([1.0]), dist.shift(array, 1, True)) assert_array_equal(np.array([1.0]), dist.shift(array, 1, True)) assert_array_equal(np.array([1.0]), dist.shift(array, -2, True)) assert_array_equal(np.array([1.0]), dist.shift(array, -2, True)) array = np.array([1.0, 2.0, 3.0, 4.0]) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 0, True)) assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, 1, True)) assert_array_equal(np.array([2.0, 3.0, 4.0, 1.]), dist.shift(array, -1, True)) assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]), dist.shift(array, 2, True)) assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]), dist.shift(array, -2, True)) assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]), dist.shift(array, 3, True)) assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, -3, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 4, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, -4, True)) assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, 5, True)) assert_array_equal(np.array([2.0, 3.0, 4.0, 1.]), dist.shift(array, -5, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 8, True)) assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, -8, True))