Beispiel #1
0
    def _localReadMoreXML(self, xmlNode):
        """
      Method that reads the portion of the xml input that belongs to this specialized class
      and initializes internal parameters
      @ In, xmlNode, xml.etree.Element, Xml element node
      @ Out, None
    """
        self.distParams = {}
        for child in xmlNode:
            if child.tag == 'metricType':
                self.metricType = child.text
            else:
                self.distParams[str(child.tag)] = utils.tryParse(child.text)
        availableMetrics = pairwise.kernel_metrics().keys(
        ) + pairwise.distance_metrics().keys() + scores.keys()
        if self.metricType not in availableMetrics:
            metricList = ', '.join(
                availableMetrics[:-1]) + ', or ' + availableMetrics[-1]
            self.raiseAnError(
                IOError,
                'Metric SKL error: metricType ' + str(self.metricType) +
                ' is not available. Available metrics are: ' + metricList +
                '.')

        for key, value in self.distParams.items():
            try:
                newValue = ast.literal_eval(value)
                if type(newValue) == list:
                    newValue = np.asarray(newValue)
                self.distParams[key] = newValue
            except:
                self.distParams[key] = value
def test_nystroem_approximation():
    # some basic tests
    rnd = np.random.RandomState(0)
    X = rnd.uniform(size=(10, 4))

    # With n_components = n_samples this is exact
    X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X)
    K = rbf_kernel(X)
    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)

    trans = Nystroem(n_components=2, random_state=rnd)
    X_transformed = trans.fit(X).transform(X)
    assert_equal(X_transformed.shape, (X.shape[0], 2))

    # test callable kernel
    linear_kernel = lambda X, Y: np.dot(X, Y.T)
    trans = Nystroem(n_components=2, kernel=linear_kernel, random_state=rnd)
    X_transformed = trans.fit(X).transform(X)
    assert_equal(X_transformed.shape, (X.shape[0], 2))

    # test that available kernels fit and transform
    kernels_available = kernel_metrics()
    for kern in kernels_available:
        trans = Nystroem(n_components=2, kernel=kern, random_state=rnd)
        X_transformed = trans.fit(X).transform(X)
        assert_equal(X_transformed.shape, (X.shape[0], 2))
Beispiel #3
0
def test_affinities():
    # Note: in the following, random_state has been selected to have
    # a dataset that yields a stable eigen decomposition both when built
    # on OSX and Linux
    X, y = make_blobs(n_samples=20,
                      random_state=0,
                      centers=[[1, 1], [-1, -1]],
                      cluster_std=0.01)
    # nearest neighbors affinity
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        sp = SpectralClustering(n_clusters=2,
                                affinity='nearest_neighbors',
                                random_state=0)
        labels = sp.fit(X).labels_
        assert_equal(adjusted_rand_score(y, labels), 1)
    assert_true(
        re.search(r'\bnot fully connected\b', str(warning_list[0].message)))

    sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0)
    labels = sp.fit(X).labels_
    assert_equal(adjusted_rand_score(y, labels), 1)

    X = check_random_state(10).rand(10, 5) * 10

    kernels_available = kernel_metrics()
    for kern in kernels_available:
        # Additive chi^2 gives a negative similarity matrix which
        # doesn't make sense for spectral clustering
        if kern != 'additive_chi2':
            sp = SpectralClustering(n_clusters=2,
                                    affinity=kern,
                                    random_state=0)
            labels = sp.fit(X).labels_
            assert_equal((X.shape[0], ), labels.shape)

    sp = SpectralClustering(n_clusters=2,
                            affinity=lambda x, y: 1,
                            random_state=0)
    labels = sp.fit(X).labels_
    assert_equal((X.shape[0], ), labels.shape)

    def histogram(x, y, **kwargs):
        """Histogram kernel implemented as a callable."""
        assert_equal(kwargs, {})  # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
    labels = sp.fit(X).labels_
    assert_equal((X.shape[0], ), labels.shape)

    # raise error on unknown affinity
    sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
    assert_raises(ValueError, sp.fit, X)
Beispiel #4
0
 def distance(self, x, y=None, **kwargs):
     """
   This method returns the distance between two points x and y. If y is not provided then x is a pointSet and a distance matrix is returned
   @ In, x, dict, dictionary containing data of x
   @ In, y, dict, dictionary containing data of y
   @ Out, value, float or numpy.ndarray, distance between x and y (if y is provided) or a square distance matrix if y is None
 """
     if y is not None:
         if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
             dictTemp = utils.mergeDictionaries(kwargs, self.distParams)
             if self.metricType in pairwise.kernel_metrics().keys():
                 value = pairwise.kernel_metrics(X=x,
                                                 Y=y,
                                                 metric=self.metricType,
                                                 **dictTemp)
             elif self.metricType in pairwise.distance_metrics():
                 value = pairwise.pairwise_distances(X=x,
                                                     Y=y,
                                                     metric=self.metricType,
                                                     **dictTemp)
             return value
         else:
             self.raiseAnError(
                 IOError,
                 'Metric SKL error: SKL metrics support only PointSets and not HistorySets'
             )
     else:
         if self.metricType == 'mahalanobis':
             covMAtrix = np.cov(x.T)
             kwargs['VI'] = np.linalg.inv(covMAtrix)
         dictTemp = utils.mergeDictionaries(kwargs, self.distParams)
         if self.metricType in pairwise.kernel_metrics().keys():
             value = pairwise.pairwise_kernels(X=x,
                                               metric=self.metricType,
                                               **dictTemp)
         elif self.metricType in pairwise.distance_metrics().keys():
             value = pairwise.pairwise_distances(X=x,
                                                 metric=self.metricType,
                                                 **dictTemp)
         return value
Beispiel #5
0
def test_affinities():
    # Note: in the following, random_state has been selected to have
    # a dataset that yields a stable eigen decomposition both when built
    # on OSX and Linux
    X, y = make_blobs(n_samples=20,
                      random_state=0,
                      centers=[[1, 1], [-1, -1]],
                      cluster_std=0.01)
    # nearest neighbors affinity
    sp = SpectralClustering(n_clusters=2,
                            affinity="nearest_neighbors",
                            random_state=0)
    with pytest.warns(UserWarning, match="not fully connected"):
        sp.fit(X)
    assert adjusted_rand_score(y, sp.labels_) == 1

    sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0)
    labels = sp.fit(X).labels_
    assert adjusted_rand_score(y, labels) == 1

    X = check_random_state(10).rand(10, 5) * 10

    kernels_available = kernel_metrics()
    for kern in kernels_available:
        # Additive chi^2 gives a negative similarity matrix which
        # doesn't make sense for spectral clustering
        if kern != "additive_chi2":
            sp = SpectralClustering(n_clusters=2,
                                    affinity=kern,
                                    random_state=0)
            labels = sp.fit(X).labels_
            assert (X.shape[0], ) == labels.shape

    sp = SpectralClustering(n_clusters=2,
                            affinity=lambda x, y: 1,
                            random_state=0)
    labels = sp.fit(X).labels_
    assert (X.shape[0], ) == labels.shape

    def histogram(x, y, **kwargs):
        # Histogram kernel implemented as a callable.
        assert kwargs == {}  # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
    labels = sp.fit(X).labels_
    assert (X.shape[0], ) == labels.shape

    # raise error on unknown affinity
    sp = SpectralClustering(n_clusters=2, affinity="<unknown>")
    with pytest.raises(ValueError):
        sp.fit(X)
Beispiel #6
0
def edge_weight(x, y, mode='rbf', gamma=0.5):
    dists = distance_metrics()
    kernels = kernel_metrics()
    kernels['bhattacharya'] = bhattacharya
    kernels['intersection'] = intersection
    if mode in dists:
        diff = dists[mode](x, y)
    elif mode in kernels:
        diff = kernels[mode](x, y, gamma=gamma)
    else:
        raise Exception('Mode not recognised')

    return np.float64(diff)
Beispiel #7
0
 def distance(self, x, y=None, **kwargs):
   """
     This method returns the distance between two points x and y. If y is not provided then x is a pointSet and a distance matrix is returned
     @ In, x, numpy.ndarray, array containing data of x, if 1D array is provided, the array will be reshaped via x.reshape(1,-1)
     @ In, y, numpy.ndarray, array containing data of y, if 1D array is provided, the array will be reshaped via y.reshape(1,-1)
     @ Out, value, numpy.ndarray, distance between x and y (if y is provided) or a square distance matrix if y is None
   """
   if y is not None:
     if isinstance(x,np.ndarray) and isinstance(y,np.ndarray):
       if len(x.shape) == 1:
         x = x.reshape(1,-1)
         #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via x.reshape(1,-1) ")
       if len(y.shape) == 1:
         y = y.reshape(1,-1)
         #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via y.reshape(1,-1) ")
       dictTemp = utils.mergeDictionaries(kwargs,self.distParams)
       if self.metricType in pairwise.kernel_metrics().keys():
         value = pairwise.pairwise_kernels(X=x, Y=y, metric=self.metricType, **dictTemp)
       elif self.metricType in pairwise.distance_metrics():
         value = pairwise.pairwise_distances(X=x, Y=y, metric=self.metricType, **dictTemp)
       if value.shape == (1,1):
         return value[0]
       else:
         return value
     else:
       self.raiseAnError(IOError,'Metric SKL error: SKL metrics support only PointSets and not HistorySets')
   else:
     if self.metricType == 'mahalanobis':
       covMAtrix = np.cov(x.T)
       kwargs['VI'] = np.linalg.inv(covMAtrix)
     dictTemp = utils.mergeDictionaries(kwargs,self.distParams)
     if self.metricType in pairwise.kernel_metrics().keys():
       value = pairwise.pairwise_kernels(X=x, metric=self.metricType, **dictTemp)
     elif self.metricType in pairwise.distance_metrics().keys():
       value = pairwise.pairwise_distances(X=x, metric=self.metricType, **dictTemp)
     if value.shape == (1,1):
       return value[0]
     else:
       return value
Beispiel #8
0
    def __init__(self, kernel, kernel_params={}, n_jobs=1):

        self.kernel = kernel
        self.n_jobs = n_jobs
        if self.kernel == 'mallow':
            self.kernel_ = mallow_kernel_wrapper(self.n_jobs)
        else:
            self.kernel_ = kernel_metrics()[kernel]
        self.kernel_params_ = kernel_params
        self._source_data = None
        self._target_data = None
        self._data = {}
        self.center = False

        self._empty_kernel_values()
Beispiel #9
0
def test_affinities():
    # Note: in the following, random_state has been selected to have
    # a dataset that yields a stable eigen decomposition both when built
    # on OSX and Linux
    X, y = make_blobs(n_samples=20, random_state=0,
                      centers=[[1, 1], [-1, -1]], cluster_std=0.01
                      )
    # nearest neighbors affinity
    sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
                            random_state=0)
    assert_warns_message(UserWarning, 'not fully connected', sp.fit, X)
    assert_equal(adjusted_rand_score(y, sp.labels_), 1)

    sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0)
    labels = sp.fit(X).labels_
    assert_equal(adjusted_rand_score(y, labels), 1)

    X = check_random_state(10).rand(10, 5) * 10

    kernels_available = kernel_metrics()
    for kern in kernels_available:
        # Additive chi^2 gives a negative similarity matrix which
        # doesn't make sense for spectral clustering
        if kern != 'additive_chi2':
            sp = SpectralClustering(n_clusters=2, affinity=kern,
                                    random_state=0)
            labels = sp.fit(X).labels_
            assert_equal((X.shape[0],), labels.shape)

    sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1,
                            random_state=0)
    labels = sp.fit(X).labels_
    assert_equal((X.shape[0],), labels.shape)

    def histogram(x, y, **kwargs):
        """Histogram kernel implemented as a callable."""
        assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
    labels = sp.fit(X).labels_
    assert_equal((X.shape[0],), labels.shape)

    # raise error on unknown affinity
    sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
    assert_raises(ValueError, sp.fit, X)
Beispiel #10
0
def krx(X, target=None, target2=None, metric="rbf", K_b_inv=None):
    r'''Computes Kernelized RX anomaly detector scores.

    Usage:

        y = rx(X [, background=bg])

    The RX anomaly detector produces a detection statistic equal to the 
    squared Mahalanobis distance of a spectrum from a background distribution
    according to

    .. math::

        y=(x-\mu_b)^T\Sigma^{-1}(x-\mu_b)

    where `x` is the pixel spectrum, :math:`\mu_b` is the background
    mean, and :math:`\Sigma` is the background covariance.

    Arguments:

        `X` (numpy.ndarray):

            For the first calling method shown, `X` can be an image with
            shape (R, C, B) or an ndarray of shape (R * C, B). If the
            `background` keyword is given, it will be used for the image
            background statistics; otherwise, background statistics will be
            computed from `X`.

    Returns numpy.ndarray:

        The return value will be the RX detector score (squared Mahalanobis
        distance) for each pixel given.  If `X` has shape (R, C, B), the
        returned ndarray will have shape (R, C)..
    
    References:

    Reed, I.S. and Yu, X., "Adaptive multiple-band CFAR detection of an optical
    pattern with unknown spectral distribution," IEEE Trans. Acoust.,
    Speech, Signal Processing, vol. 38, pp. 1760-1770, Oct. 1990.
    '''
    #TODO SOS: update this block
    if metric not in kernel_metrics():
        raise ValueError('`%` is not a supported metric.' % metric)


    return KRX(target=target, target2=target2, metric=metric, K_b_inv=K_b_inv)(X)
Beispiel #11
0
_METRICS_SCALAR_PAIRWISE = {}

_METRICS_MISC_PAIRWISE = {}
# Update with dict of kernel names and functions.
# >>> kernel_metrics()
# {'additive_chi2': sklearn.metrics.pairwise.additive_chi2_kernel,
#  'chi2': sklearn.metrics.pairwise.chi2_kernel,
#  'linear': sklearn.metrics.pairwise.linear_kernel,
#  'polynomial': sklearn.metrics.pairwise.polynomial_kernel,
#  'poly': sklearn.metrics.pairwise.polynomial_kernel,
#  'rbf': sklearn.metrics.pairwise.rbf_kernel,
#  'laplacian': sklearn.metrics.pairwise.laplacian_kernel,
#  'sigmoid': sklearn.metrics.pairwise.sigmoid_kernel,
#  'cosine': sklearn.metrics.pairwise.cosine_similarity}
# (Last Updated: sklearn.__version__ == 0.19.1)
_METRICS_MISC_PAIRWISE.update(sk_pairwise.kernel_metrics())
# Update with dict of distance names and functions.
# >>> distance_metrics()
# {'cityblock': sklearn.metrics.pairwise.manhattan_distances,  # \/
#  'cosine': sklearn.metrics.pairwise.cosine_distances,
#  'euclidean': sklearn.metrics.pairwise.euclidean_distances,  # \/
#  'l2': sklearn.metrics.pairwise.euclidean_distances,  # /\
#  'l1': sklearn.metrics.pairwise.manhattan_distances,  # \/
#  'manhattan': sklearn.metrics.pairwise.manhattan_distances,  # /\
#  'precomputed': None}
# (Last Updated: sklearn.__version__ == 0.19.1)
_METRICS_MISC_PAIRWISE.update(sk_pairwise.distance_metrics())
# Update with paired distance names (prepend "paired_") and functions.
# >>> {'paired_' + k: v for k, v in
# ...  iteritems(sk_pairwise.PAIRED_DISTANCES.copy())}
# {'paired_cosine': sklearn.metrics.pairwise.paired_cosine_distances,
Beispiel #12
0
 def distance(self, x, y=None, **kwargs):
     """
   This method returns the distance between two points x and y. If y is not provided then x is a pointSet and a distance matrix is returned
   @ In, x, numpy.ndarray, array containing data of x, if 1D array is provided, the array will be reshaped via x.reshape(1,-1)
   @ In, y, numpy.ndarray, array containing data of y, if 1D array is provided, the array will be reshaped via y.reshape(1,-1)
   @ Out, value, numpy.ndarray, distance between x and y (if y is provided) or a square distance matrix if y is None
 """
     if y is not None:
         if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
             if len(x.shape) == 1 and self.metricType not in scores.keys():
                 x = x.reshape(1, -1)
                 #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via x.reshape(1,-1) ")
             if len(y.shape) == 1 and self.metricType not in scores.keys():
                 y = y.reshape(1, -1)
                 #self.raiseAWarning(self, "1D array is provided. For consistence, this array is reshaped via y.reshape(1,-1) ")
             dictTemp = utils.mergeDictionaries(kwargs, self.distParams)
             try:
                 if self.metricType in pairwise.kernel_metrics().keys():
                     value = pairwise.pairwise_kernels(
                         X=x, Y=y, metric=self.metricType, **dictTemp)
                 elif self.metricType in pairwise.distance_metrics():
                     value = pairwise.pairwise_distances(
                         X=x, Y=y, metric=self.metricType, **dictTemp)
                 elif self.metricType in scores.keys():
                     value = np.zeros((1, 1))
                     value[:, :] = scores[self.metricType](x, y, **dictTemp)
             except TypeError as e:
                 self.raiseAWarning(
                     'There are some unexpected keyword arguments found in Metric with type "',
                     self.metricType, '"!')
                 self.raiseAnError(TypeError, 'Input parameters error:\n',
                                   str(e), '\n')
             if value.shape == (1, 1):
                 return value[0]
             else:
                 return value
         else:
             self.raiseAnError(
                 IOError,
                 'Metric SKL error: SKL metrics support only PointSets and not HistorySets'
             )
     else:
         if self.metricType == 'mahalanobis':
             covMAtrix = np.cov(x.T)
             kwargs['VI'] = np.linalg.inv(covMAtrix)
         dictTemp = utils.mergeDictionaries(kwargs, self.distParams)
         try:
             if self.metricType in pairwise.kernel_metrics().keys():
                 value = pairwise.pairwise_kernels(X=x,
                                                   metric=self.metricType,
                                                   **dictTemp)
             elif self.metricType in pairwise.distance_metrics().keys():
                 value = pairwise.pairwise_distances(X=x,
                                                     metric=self.metricType,
                                                     **dictTemp)
         except TypeError as e:
             self.raiseAWarning(
                 'There are some unexpected keyword arguments found in Metric with type "',
                 self.metricType, '"!')
             self.raiseAnError(TypeError, 'Input parameters error:\n',
                               str(e), '\n')
         if value.shape == (1, 1):
             return value[0]
         else:
             return value
Beispiel #13
0
def spectral_clustering(input: dict, output: dict, params: dict,
                        log: list) -> None:
    """ Perform spectral clustering on the input connectivity matrix.

    Parameters
    ----------
    input : dict
        Input files, allowed: {connectivity}
    output : dict
        Output file, allowed {labels}
    params : dict
        The dict is equivalent to cluster_options in the CBPtools
        documentation on readthedocs.io under the parameters for 'clustering'.
    log : dict
        Logging files, allowed {log}
    """

    # Input, output, params
    connectivity_file = input.get('connectivity')
    labels_file = output.get('labels')
    log_file = log[0]
    n_init = params.get('n_init')
    kernel = params.get('kernel')
    assign_labels = params.get('assign_labels')
    eigen_solver = params.get('eigen_solver')
    n_clusters = params.get('n_clusters')
    gamma = params.get('gamma', None)
    n_neighbors = params.get('n_neighbors', None)
    degree = params.get('degree', None)
    coef0 = params.get('coef0', None)
    eigen_tol = params.get('eigen_tol', None)

    # Set up logging
    logger = get_logger('spectral_clustering', log_file)

    _, ext = os.path.splitext(connectivity_file)
    connectivity = np.load(connectivity_file)

    if ext == '.npz':
        connectivity = connectivity.get('connectivity')

    # If the connectivity file is empty (connectivity could not be computed),
    # create an empty labels file
    if connectivity.size == 0:
        logger.warning('%s is empty, aborting clustering' % connectivity_file)
        np.save(labels_file, np.array([]))
        return

    if isinstance(eigen_tol, str):
        eigen_tol = float(eigen_tol)

    kernels = list(kernel_metrics().keys())
    kernels.extend(['nearest_neighbors', 'precomputed',
                    'precomputed_nearest_neighbors'])
    if kernel not in kernels:
        msg = 'Unknown kernel (affinity): %s' % kernel
        logger.error(msg)
        raise ValueError(msg)

    gamma_kernels = ('rbf', 'polynomial', 'sigmoid', 'laplacian', 'chi2')
    if gamma is None and kernel in gamma_kernels:
        msg = 'Setting gamma to 1./%s (1./n_features)' % connectivity.shape[1]
        logger.warning(msg)
        gamma = 1./connectivity.shape[1]

    kwargs = {'n_clusters': n_clusters, 'n_init': n_init, 'affinity': kernel,
              'assign_labels': assign_labels, 'eigen_solver': eigen_solver,
              'gamma': gamma, 'n_neighbors': n_neighbors, 'degree': degree,
              'coef0': coef0}

    kwargs = {k: v for k, v in kwargs.items() if v is not None}

    debug_msg = str(['%s=%s' % (k, v) for k, v in kwargs.items()])
    debug_msg = debug_msg.strip('[]').replace('\'', '')
    logger.debug('clustering %s with options: %s'
                 % (connectivity_file, debug_msg))

    # Perform spectral clustering on the available tolerances
    try:
        kwargs['eigen_tol'] = eigen_tol
        clustering = SpectralClustering(**kwargs)
        clustering.fit(connectivity)
        labels = clustering.labels_

        if np.unique(labels).size != n_clusters:
            logging.error('%s: %s clusters requested, only %s found'
                          % (labels_file, n_clusters, np.unique(labels).size))
            np.save(labels_file, np.array([]))

        # cluster labels are 0-indexed
        np.save(labels_file, labels)

    except np.linalg.LinAlgError as exc:
        logger.error('%s: %s (try increasing the eigen_tol with arpack '
                     'as eigen_solver)' % (labels_file, exc))
        np.save(labels_file, np.array([]))