Exemple #1
0
    def test_all(self):
        x = np.array([])
        assert_array_equal(np.array([]), dhwt.transform(x)[0])
        assert_array_equal(np.array([]), dhwt.transform(x)[1])
        assert_array_equal(x, dhwt.inverse(*dhwt.transform(x)))
        
        x = np.array([1., 1])
        assert_array_equal(np.array([1.]), dhwt.transform(x)[0])
        assert_array_equal(np.array([0.]), dhwt.transform(x)[1])
        assert_array_equal(x, dhwt.inverse(*dhwt.transform(x)))
        
        x = np.array([1., 2, 3, 0])
        assert_array_equal(np.array([1.5, 1.5]), dhwt.transform(x)[0])
        assert_array_equal(np.array([-.5, 1.5]), dhwt.transform(x)[1])
        assert_array_equal(x, dhwt.inverse(*dhwt.transform(x)))
        
        x = np.array([1., 2, 3, 0, 7])
        assert_array_equal(np.array([1.5, 1.5, 3.5]), dhwt.transform(x)[0])
        assert_array_equal(np.array([-.5, 1.5, 3.5]), dhwt.transform(x)[1])
        assert_array_equal(x, dhwt.inverse(*dhwt.transform(x)))

        x = np.array([6., 12, 15, 15, 14, 12, 120, 116])
        assert_array_equal(np.array([9., 15, 13, 118]), dhwt.transform(x)[0])
        assert_array_equal(np.array([-3, 0, 1, 2]), dhwt.transform(x)[1])
        assert_array_equal(x, dhwt.inverse(*dhwt.transform(x)))
        
        x = np.array([6., 12, 15, 15, 14, 12, 120, 116, 2])
        assert_array_equal(np.array([9., 15, 13, 118, 1]), dhwt.transform(x)[0])
        assert_array_equal(np.array([-3, 0, 1, 2, 1]), dhwt.transform(x)[1])
        assert_array_equal(x, dhwt.inverse(*dhwt.transform(x)))
Exemple #2
0
def inc_ksc(tseries, num_clusters, n_iters=-1, num_wavelets=2):
    '''
    Given the number `num_wavelets`, this method will compute subsequent 
    Discrete Harr Wavelet Transforms of the time series to be clustered. At
    each transform the number of points of the time series is decreased, thus
    we say that we are viewing the time series at a higher resolution. 
    
    Clustering will begin at the highest resolution (last transform), and the
    results from the previous resolution is used to initialized the current one.
    Only the highest resolution is initialized randomly. This technique can 
    improve the run-time of the KSC algorithm, since it is faster to cluster 
    at higher resolutions (less data points), being for subsequent resolutions
    the centroids from the previous resolution already a close approximation of
    the actual centroid. See [1] for details. 
    
    Please refer to the documentation of `_base_ksc` for a detailed summary
    of the KSC algorithm.

    Arguments
    ---------
    tseries: a matrix of shape (number of time series, size of each series)
        The time series to cluster
    n_iters: int
        The number of iterations which the algorithm will run
    num_wavelets: int
        The number of wavelets to use
        
    Returns
    -------
    centroids: a matrix of shape (num. of clusters, size of time series)
        The final centroids found by the algorithm
    assign: an array of num. series size
        The cluster id which each time series belongs to
    best_shift: an array of num. series size
        The amount shift amount performed for each time series
    cent_dists: a matrix of shape (num. centroids, num. series)
        The distance of each centroid to each time series
        
    References
    ----------
    .. [1] J. Yang and J. Leskovec, 
       "Patterns of Temporal Variation in Online Media" - WSDM'11  
       http://dl.acm.org/citation.cfm?id=1935863
    '''
    
    dhw_series = []
    dhw_series.append(tseries)
    previous = tseries
    for _ in xrange(num_wavelets):
        new_series = []
        for j in xrange(tseries.shape[0]):
            wave = transform(previous[j])[0]
            new_series.append(wave)

        previous = np.array(new_series)
        dhw_series.append(previous)

    assign = np.random.randint(0, num_clusters, tseries.shape[0])
    cents = None
    series_shift = None
    for dhw in reversed(dhw_series):
        cents = _compute_centroids(dhw, assign, num_clusters, series_shift)
        cents, assign, series_shift, dists = _base_ksc(dhw, cents, n_iters)
    
    return cents, assign, series_shift, dists
Exemple #3
0
def inc_ksc(tseries, num_clusters, n_iters=-1, num_wavelets=2):
    '''
    Given the number `num_wavelets`, this method will compute subsequent 
    Discrete Harr Wavelet Transforms of the time series to be clustered. At
    each transform the number of points of the time series is decreased, thus
    we say that we are viewing the time series at a higher resolution. 
    
    Clustering will begin at the highest resolution (last transform), and the
    results from the previous resolution is used to initialized the current one.
    Only the highest resolution is initialized randomly. This technique can 
    improve the run-time of the KSC algorithm, since it is faster to cluster 
    at higher resolutions (less data points), being for subsequent resolutions
    the centroids from the previous resolution already a close approximation of
    the actual centroid. See [1] for details. 
    
    Please refer to the documentation of `_base_ksc` for a detailed summary
    of the KSC algorithm.

    Arguments
    ---------
    tseries: a matrix of shape (number of time series, size of each series)
        The time series to cluster
    n_iters: int
        The number of iterations which the algorithm will run
    num_wavelets: int
        The number of wavelets to use
        
    Returns
    -------
    centroids: a matrix of shape (num. of clusters, size of time series)
        The final centroids found by the algorithm
    assign: an array of num. series size
        The cluster id which each time series belongs to
    best_shift: an array of num. series size
        The amount shift amount performed for each time series
    cent_dists: a matrix of shape (num. centroids, num. series)
        The distance of each centroid to each time series
        
    References
    ----------
    .. [1] J. Yang and J. Leskovec, 
       "Patterns of Temporal Variation in Online Media" - WSDM'11  
       http://dl.acm.org/citation.cfm?id=1935863
    '''

    dhw_series = []
    dhw_series.append(tseries)
    previous = tseries
    for _ in range(num_wavelets):
        new_series = []
        for j in range(tseries.shape[0]):
            wave = transform(previous[j])[0]
            new_series.append(wave)

        previous = np.array(new_series)
        dhw_series.append(previous)

    assign = np.random.randint(0, num_clusters, tseries.shape[0])
    cents = None
    series_shift = None
    for dhw in reversed(dhw_series):
        cents = _compute_centroids(dhw, assign, num_clusters, series_shift)
        cents, assign, series_shift, dists = _base_ksc(dhw, cents, n_iters)

    return cents, assign, series_shift, dists