Esempio n. 1
0
    def __init__(self, initial_means, priors=None, covariance_matrices=None,
                       conv_threshold=1e-6, bias=0.1, normalise=False,
                       svd_dimensions=None):
        """
        Creates an EM clusterer with the given starting parameters,
        convergence threshold and vector mangling parameters.

        :param  initial_means: the means of the gaussian cluster centers
        :type   initial_means: [seq of] numpy array or seq of SparseArray
        :param  priors: the prior probability for each cluster
        :type   priors: numpy array or seq of float
        :param  covariance_matrices: the covariance matrix for each cluster
        :type   covariance_matrices: [seq of] numpy array
        :param  conv_threshold: maximum change in likelihood before deemed
                    convergent
        :type   conv_threshold: int or float
        :param  bias: variance bias used to ensure non-singular covariance
                      matrices
        :type   bias: float
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param  svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type   svd_dimensions: int
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._means = numpy.array(initial_means, numpy.float64)
        self._num_clusters = len(initial_means)
        self._conv_threshold = conv_threshold
        self._covariance_matrices = covariance_matrices
        self._priors = priors
        self._bias = bias
Esempio n. 2
0
 def __init__(self,
              num_means,
              distance,
              repeats=1,
              conv_test=1e-6,
              initial_means=None,
              normalise=False,
              svd_dimensions=None,
              rng=None,
              avoid_empty_clusters=False):
     """
     :param  num_means:  the number of means to use (may use fewer)
     :type   num_means:  int
     :param  distance:   measure of distance between two vectors
     :type   distance:   function taking two vectors and returing a float
     :param  repeats:    number of randomised clustering trials to use
     :type   repeats:    int
     :param  conv_test:  maximum variation in mean differences before
                         deemed convergent
     :type   conv_test:  number
     :param  initial_means: set of k initial means
     :type   initial_means: sequence of vectors
     :param  normalise:  should vectors be normalised to length 1
     :type   normalise:  boolean
     :param svd_dimensions: number of dimensions to use in reducing vector
                            dimensionsionality with SVD
     :type svd_dimensions: int
     :param  rng:        random number generator (or None)
     :type   rng:        Random
     :param avoid_empty_clusters: include current centroid in computation
                                  of next one; avoids undefined behavior
                                  when clusters become empty
     :type avoid_empty_clusters: boolean
     """
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_means = num_means
     self._distance = distance
     self._max_difference = conv_test
     assert not initial_means or len(initial_means) == num_means
     self._means = initial_means
     assert repeats >= 1
     assert not (initial_means and repeats > 1)
     self._repeats = repeats
     if rng: self._rng = rng
     else: self._rng = random.Random()
     self._avoid_empty_clusters = avoid_empty_clusters
Esempio n. 3
0
    def __init__(self, num_means, distance, repeats=1,
                       conv_test=1e-6, initial_means=None,
                       normalise=False, svd_dimensions=None,
                       rng=None, avoid_empty_clusters=False):

        """
        :param  num_means:  the number of means to use (may use fewer)
        :type   num_means:  int
        :param  distance:   measure of distance between two vectors
        :type   distance:   function taking two vectors and returing a float
        :param  repeats:    number of randomised clustering trials to use
        :type   repeats:    int
        :param  conv_test:  maximum variation in mean differences before
                            deemed convergent
        :type   conv_test:  number
        :param  initial_means: set of k initial means
        :type   initial_means: sequence of vectors
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type svd_dimensions: int
        :param  rng:        random number generator (or None)
        :type   rng:        Random
        :param avoid_empty_clusters: include current centroid in computation
                                     of next one; avoids undefined behavior
                                     when clusters become empty
        :type avoid_empty_clusters: boolean
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._num_means = num_means
        self._distance = distance
        self._max_difference = conv_test
        assert not initial_means or len(initial_means) == num_means
        self._means = initial_means
        assert repeats >= 1
        assert not (initial_means and repeats > 1)
        self._repeats = repeats
        if rng: self._rng = rng
        else:   self._rng = random.Random()
        self._avoid_empty_clusters = avoid_empty_clusters
Esempio n. 4
0
    def __init__(self,
                 initial_means,
                 priors=None,
                 covariance_matrices=None,
                 conv_threshold=1e-6,
                 bias=0.1,
                 normalise=False,
                 svd_dimensions=None):
        """
        Creates an EM clusterer with the given starting parameters,
        convergence threshold and vector mangling parameters.

        :param  initial_means: the means of the gaussian cluster centers
        :type   initial_means: [seq of] numpy array or seq of SparseArray
        :param  priors: the prior probability for each cluster
        :type   priors: numpy array or seq of float
        :param  covariance_matrices: the covariance matrix for each cluster
        :type   covariance_matrices: [seq of] numpy array
        :param  conv_threshold: maximum change in likelihood before deemed
                    convergent
        :type   conv_threshold: int or float
        :param  bias: variance bias used to ensure non-singular covariance
                      matrices
        :type   bias: float
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param  svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type   svd_dimensions: int
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._means = numpy.array(initial_means, numpy.float64)
        self._num_clusters = len(initial_means)
        self._conv_threshold = conv_threshold
        self._covariance_matrices = covariance_matrices
        self._priors = priors
        self._bias = bias
Esempio n. 5
0
 def __init__(self, num_clusters=None, normalise=True, svd_dimensions=None):
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_clusters = num_clusters
     self._groups_values = None
     self._distMap = {}
Esempio n. 6
0
 def __init__(self, num_clusters=None, normalise=True, svd_dimensions=None):
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_clusters = num_clusters
     self._groups_values = None
     self._distMap ={}