Exemple #1
0
    def __init__(self, initial_means, priors=None, covariance_matrices=None,
                       conv_threshold=1e-6, bias=0.1, normalise=False,
                       svd_dimensions=None):
        """
        Creates an EM clusterer with the given starting parameters,
        convergence threshold and vector mangling parameters.

        :param  initial_means: the means of the gaussian cluster centers
        :type   initial_means: [seq of] numpy array or seq of SparseArray
        :param  priors: the prior probability for each cluster
        :type   priors: numpy array or seq of float
        :param  covariance_matrices: the covariance matrix for each cluster
        :type   covariance_matrices: [seq of] numpy array
        :param  conv_threshold: maximum change in likelihood before deemed
                    convergent
        :type   conv_threshold: int or float
        :param  bias: variance bias used to ensure non-singular covariance
                      matrices
        :type   bias: float
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param  svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type   svd_dimensions: int
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._means = numpy.array(initial_means, numpy.float64)
        self._num_clusters = len(initial_means)
        self._conv_threshold = conv_threshold
        self._covariance_matrices = covariance_matrices
        self._priors = priors
        self._bias = bias
Exemple #2
0
    def __init__(self, initial_means, priors=None, covariance_matrices=None,
                       conv_threshold=1e-6, bias=0.1, normalise=False,
                       svd_dimensions=None):
        """
        Creates an EM clusterer with the given starting parameters,
        convergence threshold and vector mangling parameters.

        :param  initial_means: the means of the gaussian cluster centers
        :type   initial_means: [seq of] numpy array or seq of SparseArray
        :param  priors: the prior probability for each cluster
        :type   priors: numpy array or seq of float
        :param  covariance_matrices: the covariance matrix for each cluster
        :type   covariance_matrices: [seq of] numpy array
        :param  conv_threshold: maximum change in likelihood before deemed
                    convergent
        :type   conv_threshold: int or float
        :param  bias: variance bias used to ensure non-singular covariance
                      matrices
        :type   bias: float
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param  svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type   svd_dimensions: int
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._means = numpy.array(initial_means, numpy.float64)
        self._num_clusters = len(initial_means)
        self._conv_threshold = conv_threshold
        self._covariance_matrices = covariance_matrices
        self._priors = priors
        self._bias = bias
Exemple #3
0
 def __init__(self, num_means, distance, repeats=1,
                    conv_test=1e-6, initial_means=None,
                    normalise=False, svd_dimensions=None,
                    rng=None):
     """
     :param  num_means:  the number of means to use (may use fewer)
     :type   num_means:  int
     :param  distance:   measure of distance between two vectors
     :type   distance:   function taking two vectors and returing a float
     :param  repeats:    number of randomised clustering trials to use
     :type   repeats:    int
     :param  conv_test:  maximum variation in mean differences before
                         deemed convergent
     :type   conv_test:  number
     :param  initial_means: set of k initial means
     :type   initial_means: sequence of vectors
     :param  normalise:  should vectors be normalised to length 1
     :type   normalise:  boolean
     :param svd_dimensions: number of dimensions to use in reducing vector
                            dimensionsionality with SVD
     :type svd_dimensions: int 
     :param  rng:        random number generator (or None)
     :type   rng:        Random
     """
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_means = num_means
     self._distance = distance
     self._max_difference = conv_test
     assert not initial_means or len(initial_means) == num_means
     self._means = initial_means
     assert repeats >= 1
     assert not (initial_means and repeats > 1)
     self._repeats = repeats
     if rng: self._rng = rng
     else:   self._rng = random.Random()
Exemple #4
0
    def __init__(
        self,
        num_means,
        distance,
        repeats=1,
        conv_test=1e-6,
        initial_means=None,
        normalise=False,
        svd_dimensions=None,
        rng=None,
        avoid_empty_clusters=False,
    ):

        """
        :param  num_means:  the number of means to use (may use fewer)
        :type   num_means:  int
        :param  distance:   measure of distance between two vectors
        :type   distance:   function taking two vectors and returing a float
        :param  repeats:    number of randomised clustering trials to use
        :type   repeats:    int
        :param  conv_test:  maximum variation in mean differences before
                            deemed convergent
        :type   conv_test:  number
        :param  initial_means: set of k initial means
        :type   initial_means: sequence of vectors
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type svd_dimensions: int
        :param  rng:        random number generator (or None)
        :type   rng:        Random
        :param avoid_empty_clusters: include current centroid in computation
                                     of next one; avoids undefined behavior
                                     when clusters become empty
        :type avoid_empty_clusters: boolean
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._num_means = num_means
        self._distance = distance
        self._max_difference = conv_test
        assert not initial_means or len(initial_means) == num_means
        self._means = initial_means
        assert repeats >= 1
        assert not (initial_means and repeats > 1)
        self._repeats = repeats
        self._rng = rng if rng else random.Random()
        self._avoid_empty_clusters = avoid_empty_clusters
Exemple #5
0
 def cluster(self, vectors, assign_clusters=False, trace=False):
     # stores the merge order
     self._dendrogram = Dendrogram(
         [numpy.array(vector, numpy.float64) for vector in vectors])
     return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
Exemple #6
0
 def __init__(self, num_clusters=1, normalise=True, svd_dimensions=None):
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_clusters = num_clusters
     self._dendrogram = None
     self._groups_values = None
Exemple #7
0
 def cluster(self, vectors, assign_clusters=False, trace=False):
     # stores the merge order
     self._dendrogram = Dendrogram(
         [numpy.array(vector, numpy.float64) for vector in vectors]
     )
     return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
Exemple #8
0
 def __init__(self, num_clusters=1, normalise=True, svd_dimensions=None):
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_clusters = num_clusters
     self._dendrogram = None
     self._groups_values = None