Esempio n. 1
0
def cause(n, k, p1, p2):
    g = GaussianMixture(k)
    g.means_ = p1 * np.random.randn(k, 1)
    g.covars_ = np.power(abs(p2 * np.random.randn(k, 1) + 1), 2)
    g.weights_ = abs(np.random.rand(k, 1))
    g.weights_ = g.weights_ / sum(g.weights_)
    return scale(g.sample(n))
Esempio n. 2
0
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = GaussianMixture(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features)
    g.weights_ = normalized(prng.rand(n_mix))
    return g
def gmm_cause(points, k=2, p1=3, p2=4):
    """Init a root cause with a Gaussian Mixture Model w/ a spherical covariance type."""
    g = GMM(k, covariance_type="spherical")
    g.fit(np.random.randn(300, 1))

    g.means_ = p1 * np.random.randn(k, 1)
    g.covars_ = np.power(abs(p2 * np.random.randn(k, 1) + 1), 2)
    g.weights_ = abs(np.random.rand(k))
    g.weights_ = g.weights_ / sum(g.weights_)
    return g.sample(points)[0].reshape(-1)
Esempio n. 4
0
    def _set_GMMs(self, statesSequence):
        '''
        build n-state GMMs with scikit-learn's classes.
        copy means etc. from self.weights and self.means and self.covars
        '''
        
        
        if statesSequence==None:
            sys.exit('no state sequence')
        
        numMixtures = self._get_num_mixtures(statesSequence)
        
        means = numpy.empty((self.n, numMixtures, self.numDimensions))
        
        weights = numpy.ones((self.n, numMixtures),dtype=numpy.double)
        
        # init covars
        covars = [[ numpy.matrix(numpy.eye(self.numDimensions, self.numDimensions)) for j in xrange(numMixtures)] for i in xrange(self.n)]
               
        for i in range(len(statesSequence) ):
            state  = statesSequence[i] 
            
            if ParametersAlgo.FOR_MAKAM:
                for (numMixture, weight, mixture) in state.mixtures:
                    
                    weights[i,numMixture-1] = weight
                    
                    means[i,numMixture-1,:] = mixture.mean.vector
                    
                    variance_ = mixture.var.vector
                    for k in  range(len( variance_) ):
                        covars[i][numMixture-1][k,k] = variance_[k]
            
            elif ParametersAlgo.FOR_JINGJU:
                gmm_ = state.mixtures
            
                for numMixture in range(gmm_.n_components):
                    weights[i,numMixture] = gmm_.weights_[numMixture]
                    
                    means[i,numMixture,:] = gmm_.means_[numMixture]
                    
                    variance_ = gmm_.covars_[numMixture]
                    
                    for k in  range(len( variance_) ):
                        covars[i][numMixture][k,k] = variance_[k]
        
        ####### put into GMM models 
        self.GMMs = numpy.empty(self.n, dtype=GMM_)

        
  
        
        for stateIdx in range(self.n):
            curr_GMM = GMM_(covariance_type='diag', n_components=numMixtures)
            curr_GMM.means_ = means[stateIdx]
            
            curr_GMM.covars_  = numpy.zeros((numMixtures, self.numDimensions))
            for m_idx in range(numMixtures):
                for d_idx in range(self.numDimensions):
                    a = covars[stateIdx][m_idx][d_idx,d_idx]
                    curr_GMM.covars_[m_idx,d_idx] = a
            
            curr_GMM.weights_ = weights[stateIdx]
            
            
            curr_GMM.precisions_cholesky_ = _compute_precision_cholesky(curr_GMM.covars_, curr_GMM.covariance_type)
            self.GMMs[stateIdx] = curr_GMM    
Esempio n. 5
0
def fit_gmm_to_points(points,
                      n_components,
                      mdl,
                      ps=[],
                      num_iter=100,
                      covariance_type='full',
                      min_covar=0.001,
                      init_centers=[],
                      force_radii=-1.0,
                      force_weight=-1.0,
                      mass_multiplier=1.0):
    """fit a GMM to some points. Will return the score and the Akaike score.
    Akaike information criterion for the current model fit. It is a measure
    of the relative quality of the GMM that takes into account the
    parsimony and the goodness of the fit.
    if no particles are provided, they will be created

    points:            list of coordinates (python)
    n_components:      number of gaussians to create
    mdl:               IMP Model
    ps:                list of particles to be decorated. if empty, will add
    num_iter:          number of EM iterations
    covariance_type:   covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
    min_covar:         assign a minimum value to covariance term. That is used to have more spherical
                       shaped gaussians
    init_centers:      initial coordinates of the GMM
    force_radii:       fix the radii (spheres only)
    force_weight:      fix the weights
    mass_multiplier:   multiply the weights of all the gaussians by this value
    dirichlet:         use the DGMM fitting (can reduce number of components, takes longer)
    """


    new_sklearn = False
    try:
        from sklearn.mixture import GMM
    except ImportError:
        from sklearn.mixture import GaussianMixture
        new_sklearn = True

    print('creating GMM with n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
    if new_sklearn:
        # aic() calls size() on points, so it needs to a numpy array, not a list
        points = np.array(points)
        weights_init = precisions_init = None
        if force_radii != -1.0:
            print('warning: radii can no longer be forced, but setting '
                  'initial values to ', force_radii)
            precisions_init = np.array([[1./force_radii]*3
                                       for i in range(n_components)])
        if force_weight != -1.0:
            print('warning: weights can no longer be forced, but setting '
                  'initial values to ', force_weight)
            weights_init = np.array([force_weight]*n_components)

        gmm = GaussianMixture(n_components=n_components,
                              max_iter=num_iter,
                              covariance_type=covariance_type,
                              weights_init=weights_init,
                              precisions_init=precisions_init,
                              means_init=None if init_centers==[]
                                              else init_centers)
    else:
        params='m'
        init_params='m'
        if force_radii==-1.0:
            params+='c'
            init_params+='c'
        else:
            covariance_type='spherical'
            print('forcing spherical with radii',force_radii)

        if force_weight==-1.0:
            params+='w'
            init_params+='w'
        else:
            print('forcing weights to be',force_weight)

        gmm = GMM(n_components=n_components, n_iter=num_iter,
                  covariance_type=covariance_type, min_covar=min_covar,
                  params=params, init_params=init_params)
        if force_weight!=-1.0:
            gmm.weights_=np.array([force_weight]*n_components)
        if force_radii!=-1.0:
            gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
        if init_centers!=[]:
            gmm.means_=init_centers
    print('fitting')
    model=gmm.fit(points)
    score=gmm.score(points)
    akaikescore=model.aic(points)
    #print('>>> GMM score',gmm.score(points))

    ### convert format to core::Gaussian
    if new_sklearn:
        covars = gmm.covariances_
    else:
        covars = gmm.covars_
    for ng in range(n_components):
        covar=covars[ng]
        if covar.size==3:
            covar=np.diag(covar).tolist()
        else:
            covar=covar.tolist()
        center=list(gmm.means_[ng])
        weight=mass_multiplier*gmm.weights_[ng]
        if ng>=len(ps):
            ps.append(IMP.Particle(mdl))
        shape=IMP.algebra.get_gaussian_from_covariance(covar,IMP.algebra.Vector3D(center))
        g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
        IMP.atom.Mass.setup_particle(ps[ng],weight)
        IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))

    return (score,akaikescore)
Esempio n. 6
0
def clustering_gmm(data,
                   n_clusters,
                   tol=1e-7,
                   min_covar=None,
                   scale='logicle'):
    """
    Find clusters in an array using a Gaussian Mixture Model.

    Before clustering, `data` can be automatically rescaled as specified by
    the `scale` argument.

    Parameters
    ----------
    data : FCSData or array_like
        Data to cluster.
    n_clusters : int
        Number of clusters to find.
    tol : float, optional
        Tolerance for convergence. Directly passed to either
        ``GaussianMixture`` or ``GMM``, depending on ``scikit-learn``'s
        version.
    min_covar : float, optional
        The minimum trace that the initial covariance matrix will have. If
        ``scikit-learn``'s version is older than 0.18, `min_covar` is also
        passed directly to ``GMM``.
    scale : str, optional
        Rescaling applied to `data` before performing clustering. Can be
        either ``linear`` (no rescaling), ``log``, or ``logicle``.

    Returns
    -------
    labels : array
        Nx1 array with labels for each element in `data`, assigning
        ``data[i]`` to cluster ``labels[i]``.

    Notes
    -----
    A Gaussian Mixture Model finds clusters by fitting a linear combination
    of `n_clusters` Gaussian probability density functions (pdf) to `data`
    using Expectation Maximization (EM).

    This method can be fairly sensitive to the initial parameter choice. To
    generate a reasonable set of initial conditions, `clustering_gmm`
    first divides all points in `data` into `n_clusters` groups of the
    same size based on their Euclidean distance to the minimum value. Then,
    for each group, the 50% samples farther away from the mean are
    discarded. The mean and covariance are calculated from the remaining
    samples of each group, and used as initial conditions for the GMM EM
    algorithm.

    `clustering_gmm` internally uses a `GaussianMixture` object from the
    ``scikit-learn`` library (``GMM`` if ``scikit-learn``'s version is
    lower than 0.18), with full covariance matrices for each cluster. For
    more information, consult ``scikit-learn``'s documentation.

    """

    # Initialize min_covar parameter
    # Parameter is initialized differently depending on scikit's version
    if min_covar is None:
        if packaging.version.parse(sklearn.__version__) \
                >= packaging.version.parse('0.18'):
            min_covar = 1e-3
        else:
            min_covar = 5e-5

    # Copy events before rescaling
    data = data.copy()

    # Apply rescaling
    if scale == 'linear':
        # No rescaling
        pass
    elif scale == 'log':
        # Logarithm of zero and negatives is undefined. Therefore, saturate
        # any non-positives to a small positive value.
        # The machine epsilon `eps` is the smallest number such that
        # `1.0 + eps != eps`. For a 64-bit floating point, `eps ~= 1e-15`.
        data[data < 1e-15] = 1e-15
        # Rescale
        data = np.log10(data)
    elif scale == 'logicle':
        # Use the logicle transform class in the plot module, and transform
        # data one channel at a time.
        for ch in range(data.shape[1]):
            # We need a transformation from "data value" to "display scale"
            # units. To do so, we use an inverse logicle transformation.
            t = FlowCal.plot._LogicleTransform(data=data,
                                               channel=ch).inverted()
            data[:, ch] = t.transform_non_affine(data[:, ch],
                                                 mask_out_of_range=False)
    else:
        raise ValueError("scale {} not supported".format(scale))

    ###
    # Parameter initialization
    ###
    weights = np.tile(1.0 / n_clusters, n_clusters)
    means = []
    covars = []

    # Calculate distance to minimum value. Then, sort based on this distance.
    dist = np.sum((data - np.min(data, axis=0))**2., axis=1)
    sorted_idx = np.argsort(dist)

    # Expected number of elements per cluster
    n_per_cluster = data.shape[0] / float(n_clusters)

    # Get means and covariances per cluster
    # We will just use a fraction of ``1 - discard_frac`` of the data.
    # Data at the edges that actually corresponds to another cluster can
    # really mess up the final result.
    discard_frac = 0.5
    for i in range(n_clusters):
        il = int((i + discard_frac / 2) * n_per_cluster)
        ih = int((i + 1 - discard_frac / 2) * n_per_cluster)
        sorted_idx_cluster = sorted_idx[il:ih]
        data_cluster = data[sorted_idx_cluster]
        # Calculate means and covariances
        means.append(np.mean(data_cluster, axis=0))
        if data.shape[1] == 1:
            cov = np.cov(data_cluster.T).reshape(1, 1)
        else:
            cov = np.cov(data_cluster.T)
        # Add small number to diagonal to avoid near-singular covariances
        cov += np.eye(data.shape[1]) * min_covar
        covars.append(cov)
    # Means should be an array
    means = np.array(means)

    ###
    # Run Gaussian Mixture Model Clustering
    ###

    if packaging.version.parse(sklearn.__version__) \
            >= packaging.version.parse('0.18'):

        # GaussianMixture uses precisions, the inverse of covariances.
        # To get the inverse, we solve the linear equation C*P = I. We also
        # use the fact that C is positive definite.
        precisions = [
            scipy.linalg.solve(c, np.eye(c.shape[0]), assume_a='pos')
            for c in covars
        ]
        precisions = np.array(precisions)

        # Initialize GaussianMixture object
        gmm = GaussianMixture(n_components=n_clusters,
                              tol=tol,
                              covariance_type='full',
                              weights_init=weights,
                              means_init=means,
                              precisions_init=precisions,
                              max_iter=500)

    else:
        # Initialize GMM object
        gmm = GMM(n_components=n_clusters,
                  tol=tol,
                  min_covar=min_covar,
                  covariance_type='full',
                  params='mc',
                  init_params='')

        # Set initial parameters
        gmm.weight_ = weights
        gmm.means_ = means
        gmm.covars_ = covars

    # Fit
    gmm.fit(data)

    # Get labels by sampling from the responsibilities
    # This avoids the complete elimination of a cluster if two or more
    # clusters have very similar means.
    resp = gmm.predict_proba(data)
    labels = [np.random.choice(range(n_clusters), p=ri) for ri in resp]

    return labels
Esempio n. 7
0
def fit_gmm_to_points(points,
                      n_components,
                      mdl,
                      ps=[],
                      num_iter=100,
                      covariance_type='full',
                      min_covar=0.001,
                      init_centers=[],
                      force_radii=-1.0,
                      force_weight=-1.0,
                      mass_multiplier=1.0):
    """fit a GMM to some points. Will return the score and the Akaike score.
    Akaike information criterion for the current model fit. It is a measure
    of the relative quality of the GMM that takes into account the
    parsimony and the goodness of the fit.
    if no particles are provided, they will be created

    points:            list of coordinates (python)
    n_components:      number of gaussians to create
    mdl:               IMP Model
    ps:                list of particles to be decorated. if empty, will add
    num_iter:          number of EM iterations
    covariance_type:   covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
    min_covar:         assign a minimum value to covariance term. That is used to have more spherical
                       shaped gaussians
    init_centers:      initial coordinates of the GMM
    force_radii:       fix the radii (spheres only)
    force_weight:      fix the weights
    mass_multiplier:   multiply the weights of all the gaussians by this value
    dirichlet:         use the DGMM fitting (can reduce number of components, takes longer)
    """


    new_sklearn = False
    try:
        from sklearn.mixture import GMM
    except ImportError:
        from sklearn.mixture import GaussianMixture
        new_sklearn = True

    print('creating GMM with n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
    if new_sklearn:
        # aic() calls size() on points, so it needs to be
        # a numpy array, not a list
        points = np.array(points)
        weights_init = precisions_init = None
        if force_radii != -1.0:
            print('warning: radii can no longer be forced, but setting '
                  'initial values to ', force_radii)
            precisions_init = np.array([[1./force_radii]*3
                                       for i in range(n_components)])
        if force_weight != -1.0:
            print('warning: weights can no longer be forced, but setting '
                  'initial values to ', force_weight)
            weights_init = np.array([force_weight]*n_components)

        gmm = GaussianMixture(n_components=n_components,
                              max_iter=num_iter,
                              covariance_type=covariance_type,
                              weights_init=weights_init,
                              precisions_init=precisions_init,
                              means_init=None if init_centers==[]
                                              else init_centers)
    else:
        params='m'
        init_params='m'
        if force_radii==-1.0:
            params+='c'
            init_params+='c'
        else:
            covariance_type='spherical'
            print('forcing spherical with radii',force_radii)

        if force_weight==-1.0:
            params+='w'
            init_params+='w'
        else:
            print('forcing weights to be',force_weight)

        gmm = GMM(n_components=n_components, n_iter=num_iter,
                  covariance_type=covariance_type, min_covar=min_covar,
                  params=params, init_params=init_params)
        if force_weight!=-1.0:
            gmm.weights_=np.array([force_weight]*n_components)
        if force_radii!=-1.0:
            gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
        if init_centers!=[]:
            gmm.means_=init_centers
    print('fitting')
    model=gmm.fit(points)
    score=gmm.score(points)
    akaikescore=model.aic(points)
    #print('>>> GMM score',gmm.score(points))

    ### convert format to core::Gaussian
    if new_sklearn:
        covars = gmm.covariances_
    else:
        covars = gmm.covars_
    for ng in range(n_components):
        covar=covars[ng]
        if covar.size==3:
            covar=np.diag(covar).tolist()
        else:
            covar=covar.tolist()
        center=list(gmm.means_[ng])
        weight=mass_multiplier*gmm.weights_[ng]
        if ng>=len(ps):
            ps.append(IMP.Particle(mdl))
        shape=IMP.algebra.get_gaussian_from_covariance(covar,IMP.algebra.Vector3D(center))
        g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
        IMP.atom.Mass.setup_particle(ps[ng],weight)
        IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))

    return (score,akaikescore)