Example #1
0
def ksc(tseries, num_clusters, n_iters=-1, n_runs=10):
    '''
    This method will make `n_runs` call to `_base_ksc` returning the results
    from the run with the lowest over-all clustering cost. In each run,
    a random initialization of centroids is performed. This is done by assigning
    time series to clusters in a uniform random manner and then computing the
    centroid of each cluster.

    Please refer to the documentation of `_base_ksc` for a detailed summary
    of the KSC algorithm.

    Arguments
    ---------
    tseries: a matrix of shape (number of time series, size of each series)
        The time series to cluster
    n_iters: int
        The number of iterations which the algorithm will run
    n_runs: int
        The number of times to run the KSC algorithm
        
    Returns
    -------
    centroids: a matrix of shape (num. of clusters, size of time series)
        The final centroids found by the algorithm
    assign: an array of num. series size
        The cluster id which each time series belongs to
    best_shift: an array of num. series size
        The amount shift amount performed for each time series
    cent_dists: a matrix of shape (num. centroids, num. series)
        The distance of each centroid to each time series
        
    References
    ----------
    .. [1] J. Yang and J. Leskovec, 
       "Patterns of Temporal Variation in Online Media" - WSDM'11  
       http://dl.acm.org/citation.cfm?id=1935863
    '''
    
    min_cost = float('+inf')
    
    best_cents = None
    best_assign = None
    best_shift = None
    best_dist = None

    for _ in xrange(n_runs):
        assign = np.random.randint(0, num_clusters, tseries.shape[0])
        cents = _compute_centroids(tseries, assign, num_clusters)

        cents, assign, series_shift, dists = _base_ksc(tseries, cents, n_iters)
        clust_cost = cost(tseries, assign, cents, dists)

        if clust_cost < min_cost:
            min_cost = clust_cost
            best_cents = cents
            best_assign = assign
            best_shift = series_shift
            best_dist = dists

    return best_cents, best_assign, best_shift, best_dist
Example #2
0
def ksc(tseries, num_clusters, n_iters=-1, n_runs=10):
    '''
    This method will make `n_runs` call to `_base_ksc` returning the results
    from the run with the lowest over-all clustering cost. In each run,
    a random initialization of centroids is performed. This is done by assigning
    time series to clusters in a uniform random manner and then computing the
    centroid of each cluster.

    Please refer to the documentation of `_base_ksc` for a detailed summary
    of the KSC algorithm.

    Arguments
    ---------
    tseries: a matrix of shape (number of time series, size of each series)
        The time series to cluster
    n_iters: int
        The number of iterations which the algorithm will run
    n_runs: int
        The number of times to run the KSC algorithm
        
    Returns
    -------
    centroids: a matrix of shape (num. of clusters, size of time series)
        The final centroids found by the algorithm
    assign: an array of num. series size
        The cluster id which each time series belongs to
    best_shift: an array of num. series size
        The amount shift amount performed for each time series
    cent_dists: a matrix of shape (num. centroids, num. series)
        The distance of each centroid to each time series
        
    References
    ----------
    .. [1] J. Yang and J. Leskovec, 
       "Patterns of Temporal Variation in Online Media" - WSDM'11  
       http://dl.acm.org/citation.cfm?id=1935863
    '''

    min_cost = float('+inf')

    best_cents = None
    best_assign = None
    best_shift = None
    best_dist = None

    for _ in range(n_runs):
        assign = np.random.randint(0, num_clusters, tseries.shape[0])
        cents = _compute_centroids(tseries, assign, num_clusters)

        cents, assign, series_shift, dists = _base_ksc(tseries, cents, n_iters)
        clust_cost = cost(tseries, assign, cents, dists)

        if clust_cost < min_cost:
            min_cost = clust_cost
            best_cents = cents
            best_assign = assign
            best_shift = series_shift
            best_dist = dists

    return best_cents, best_assign, best_shift, best_dist
Example #3
0
def run_clustering(X, k, dists_all):

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)

    intra = metrics.avg_intra_dist(X, assign, dists_all)[0]
    inter = metrics.avg_inter_dist(X, assign, dists_all)[0]
    bcv = metrics.beta_cv(X, assign, dists_all)
    cost = metrics.cost(X, assign, None, dists_cent)

    return intra, inter, bcv, cost
Example #4
0
def run_clustering(X, k, dists_all):

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)

    intra = metrics.avg_intra_dist(X, assign, dists_all)[0]
    inter = metrics.avg_inter_dist(X, assign, dists_all)[0]
    bcv = metrics.beta_cv(X, assign, dists_all)
    cost = metrics.cost(X, assign, None, dists_cent)

    return intra, inter, bcv, cost