Exemple #1
0
def kmeans_select_best(features,
                       ks,
                       repeats=1,
                       method='AIC',
                       R=None,
                       **kwargs):
    '''
    assignments_centroids = kmeans_select_best(features, ks, repeats=1, method='AIC', R=None, **kwargs)

    Perform ``repeats`` calls to ``kmeans`` for each ``k`` in ``ks``, select
    the best one according to ``method.``

    Note that, unlike a raw ``kmeans`` call, this is *always deterministic*
    even if ``R=None`` (which is interpreted as being equivalent to setting it
    to a fixed value). Otherwise, the jug paradigm would be broken as different
    runs would give different results.

    Parameters
    ----------
    features : array-like
        2D array
    ks : sequence of integers
        These will be the values of ``k`` to try
    repeats : integer, optional
        How many times to attempt each k (default: 1).
    method : str, optional
        Which method to use. Must be one of 'AIC' (default) or 'BIC'.
    R : random number source, optional
        Even you do not pass a value, the result will be deterministic. This is
        different from the typical behaviour of ``R``, but, when using jug,
        reproducibility is often but, when using jug, reproducibility is often
        a desired feature.
    kwargs : other options
        These are passed transparently to ``kmeans``

    Returns
    -------
    assignments_centroids : jug.Task
        jug.Task which is the result of the best (as measured by ``method``)
        kmeans clustering.
    '''
    from milk import kmeans
    from milk.utils import get_pyrandom
    kmeans = TaskGenerator(kmeans)
    if R is not None:
        start = get_pyrandom(R).randint(0, 1024 * 1024)
    else:
        start = 7
    results = []
    for ki, k in enumerate(ks):
        for i in xrange(repeats):
            results.append(
                kmeans(features, k, R=(start + 7 * repeats * ki + i),
                       **kwargs))
    return _select_best(features, results, method)[1]
Exemple #2
0
def kmeans_select_best(features, ks, repeats=1, method='AIC', R=None, **kwargs):
    '''
    assignments_centroids = kmeans_select_best(features, ks, repeats=1, method='AIC', R=None, **kwargs)

    Perform ``repeats`` calls to ``kmeans`` for each ``k`` in ``ks``, select
    the best one according to ``method.``

    Note that, unlike a raw ``kmeans`` call, this is *always deterministic*
    even if ``R=None`` (which is interpreted as being equivalent to setting it
    to a fixed value). Otherwise, the jug paradigm would be broken as different
    runs would give different results.

    Parameters
    ----------
    features : array-like
        2D array
    ks : sequence of integers
        These will be the values of ``k`` to try
    repeats : integer, optional
        How many times to attempt each k (default: 1).
    method : str, optional
        Which method to use. Must be one of 'AIC' (default) or 'BIC'.
    R : random number source, optional
        Even you do not pass a value, the result will be deterministic. This is
        different from the typical behaviour of ``R``, but, when using jug,
        reproducibility is often but, when using jug, reproducibility is often
        a desired feature.
    kwargs : other options
        These are passed transparently to ``kmeans``

    Returns
    -------
    assignments_centroids : jug.Task
        jug.Task which is the result of the best (as measured by ``method``)
        kmeans clustering.
    '''
    from milk import kmeans
    from milk.utils import get_pyrandom
    kmeans = TaskGenerator(kmeans)
    if R is not None:
        start = get_pyrandom(R).randint(0,1024*1024)
    else:
        start = 7
    results = []
    for ki,k in enumerate(ks):
        for i in xrange(repeats):
            results.append(kmeans(features, k, R=(start+7*repeats*ki+i), **kwargs))
    return _select_best(features, results, method)[1]
Exemple #3
0
def kmeans_select_best(features, ks, repeats=1, method='AIC', R=None, **kwargs):
    '''
    assignments_centroids = kmeans_select_best(features, ks, repeats=1, method='AIC', R=None, **kwargs)

    Perform ``repeats`` calls to ``kmeans`` for each ``k`` in ``ks``, select
    the best one according to ``method.``

    Note that, unlike a raw ``kmeans`` call, this is *always deterministic*
    even if ``R=None``.

    Parameters
    ----------
    features : array-like
        2D array
    ks : sequence of integers
        These will be the values of ``k`` to try
    repeats : integer, optional
        How many times to attempt each k (default: 1).
    method : str, optional
        Which method to use. Must be one of 'AIC' (default) or 'BIC'.
    R : random number source, optional
        If you do not pass a value, the result will be deterministic
    kwargs : other options
        These are passed transparently to ``kmeans``

    Returns
    -------
    assignments_centroids : jug.Task
        jug.Task which is the result of the best (as measured by ``method``)
        kmeans clustering.
    '''
    from milk import kmeans
    from milk.utils import get_pyrandom
    kmeans = TaskGenerator(kmeans)
    if R is not None:
        start = get_pyrandom(R).randint(0,1024*1024)
    else:
        start = 7
    results = []
    for ki,k in enumerate(ks):
        for i in xrange(repeats):
            results.append(kmeans(features, k, R=(start+7*repeats*ki+i), **kwargs))
    return _select_best(features, results, method)