def __init__(self, initial_means, priors=None, covariance_matrices=None, conv_threshold=1e-6, bias=0.1, normalise=False, svd_dimensions=None): """ Creates an EM clusterer with the given starting parameters, convergence threshold and vector mangling parameters. :param initial_means: the means of the gaussian cluster centers :type initial_means: [seq of] numpy array or seq of SparseArray :param priors: the prior probability for each cluster :type priors: numpy array or seq of float :param covariance_matrices: the covariance matrix for each cluster :type covariance_matrices: [seq of] numpy array :param conv_threshold: maximum change in likelihood before deemed convergent :type conv_threshold: int or float :param bias: variance bias used to ensure non-singular covariance matrices :type bias: float :param normalise: should vectors be normalised to length 1 :type normalise: boolean :param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD :type svd_dimensions: int """ VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._means = numpy.array(initial_means, numpy.float64) self._num_clusters = len(initial_means) self._conv_threshold = conv_threshold self._covariance_matrices = covariance_matrices self._priors = priors self._bias = bias
def __init__(self, num_means, distance, repeats=1, conv_test=1e-6, initial_means=None, normalise=False, svd_dimensions=None, rng=None): """ :param num_means: the number of means to use (may use fewer) :type num_means: int :param distance: measure of distance between two vectors :type distance: function taking two vectors and returing a float :param repeats: number of randomised clustering trials to use :type repeats: int :param conv_test: maximum variation in mean differences before deemed convergent :type conv_test: number :param initial_means: set of k initial means :type initial_means: sequence of vectors :param normalise: should vectors be normalised to length 1 :type normalise: boolean :param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD :type svd_dimensions: int :param rng: random number generator (or None) :type rng: Random """ VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_means = num_means self._distance = distance self._max_difference = conv_test assert not initial_means or len(initial_means) == num_means self._means = initial_means assert repeats >= 1 assert not (initial_means and repeats > 1) self._repeats = repeats if rng: self._rng = rng else: self._rng = random.Random()
def __init__( self, num_means, distance, repeats=1, conv_test=1e-6, initial_means=None, normalise=False, svd_dimensions=None, rng=None, avoid_empty_clusters=False, ): """ :param num_means: the number of means to use (may use fewer) :type num_means: int :param distance: measure of distance between two vectors :type distance: function taking two vectors and returing a float :param repeats: number of randomised clustering trials to use :type repeats: int :param conv_test: maximum variation in mean differences before deemed convergent :type conv_test: number :param initial_means: set of k initial means :type initial_means: sequence of vectors :param normalise: should vectors be normalised to length 1 :type normalise: boolean :param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD :type svd_dimensions: int :param rng: random number generator (or None) :type rng: Random :param avoid_empty_clusters: include current centroid in computation of next one; avoids undefined behavior when clusters become empty :type avoid_empty_clusters: boolean """ VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_means = num_means self._distance = distance self._max_difference = conv_test assert not initial_means or len(initial_means) == num_means self._means = initial_means assert repeats >= 1 assert not (initial_means and repeats > 1) self._repeats = repeats self._rng = rng if rng else random.Random() self._avoid_empty_clusters = avoid_empty_clusters
def cluster(self, vectors, assign_clusters=False, trace=False): # stores the merge order self._dendrogram = Dendrogram( [numpy.array(vector, numpy.float64) for vector in vectors]) return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
def __init__(self, num_clusters=1, normalise=True, svd_dimensions=None): VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_clusters = num_clusters self._dendrogram = None self._groups_values = None
def cluster(self, vectors, assign_clusters=False, trace=False): # stores the merge order self._dendrogram = Dendrogram( [numpy.array(vector, numpy.float64) for vector in vectors] ) return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)